diff --git a/codes/A2C/agent.py b/codes/A2C/agent.py index 997401b..bd26785 100644 --- a/codes/A2C/agent.py +++ b/codes/A2C/agent.py @@ -40,10 +40,10 @@ class ActorCritic(nn.Module): class A2C: ''' A2C算法 ''' - def __init__(self,state_dim,action_dim,cfg) -> None: + def __init__(self,n_states,n_actions,cfg) -> None: self.gamma = cfg.gamma self.device = cfg.device - self.model = ActorCritic(state_dim, action_dim, cfg.hidden_size).to(self.device) + self.model = ActorCritic(n_states, n_actions, cfg.hidden_size).to(self.device) self.optimizer = optim.Adam(self.model.parameters()) def compute_returns(self,next_value, rewards, masks): diff --git a/codes/A2C/task0.py b/codes/A2C/task0.py index fd54d87..e0296ed 100644 --- a/codes/A2C/task0.py +++ b/codes/A2C/task0.py @@ -74,9 +74,9 @@ def train(cfg,envs): print(f'环境:{cfg.env_name}, 算法:{cfg.algo}, 设备:{cfg.device}') env = gym.make(cfg.env_name) # a single env env.seed(10) - state_dim = envs.observation_space.shape[0] - action_dim = envs.action_space.n - model = ActorCritic(state_dim, action_dim, cfg.hidden_dim).to(cfg.device) + n_states = envs.observation_space.shape[0] + n_actions = envs.action_space.n + model = ActorCritic(n_states, n_actions, cfg.hidden_dim).to(cfg.device) optimizer = optim.Adam(model.parameters()) frame_idx = 0 test_rewards = [] diff --git a/codes/DDPG/env.py b/codes/DDPG/env.py index 92fe482..89445cf 100644 --- a/codes/DDPG/env.py +++ b/codes/DDPG/env.py @@ -39,15 +39,15 @@ class OUNoise(object): self.max_sigma = max_sigma self.min_sigma = min_sigma self.decay_period = decay_period - self.action_dim = action_space.shape[0] + self.n_actions = action_space.shape[0] self.low = action_space.low self.high = action_space.high self.reset() def reset(self): - self.obs = np.ones(self.action_dim) * self.mu + self.obs = np.ones(self.n_actions) * self.mu def evolve_obs(self): x = self.obs - dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.action_dim) + dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.n_actions) self.obs = x + dx return self.obs def get_action(self, action, t=0): diff --git a/codes/DQN/README.md b/codes/DQN/README.md index fc82fe6..33e7397 100644 --- a/codes/DQN/README.md +++ b/codes/DQN/README.md @@ -50,15 +50,15 @@ import torch.nn as nn import torch.nn.functional as F class FCN(nn.Module): - def __init__(self, state_dim=4, action_dim=18): + def __init__(self, n_states=4, n_actions=18): """ 初始化q网络,为全连接网络 - state_dim: 输入的feature即环境的state数目 - action_dim: 输出的action总个数 + n_states: 输入的feature即环境的state数目 + n_actions: 输出的action总个数 """ super(FCN, self).__init__() - self.fc1 = nn.Linear(state_dim, 128) # 输入层 + self.fc1 = nn.Linear(n_states, 128) # 输入层 self.fc2 = nn.Linear(128, 128) # 隐藏层 - self.fc3 = nn.Linear(128, action_dim) # 输出层 + self.fc3 = nn.Linear(128, n_actions) # 输出层 def forward(self, x): # 各层对应的激活函数 @@ -66,7 +66,7 @@ class FCN(nn.Module): x = F.relu(self.fc2(x)) return self.fc3(x) ``` -输入为state_dim,输出为action_dim,包含一个128维度的隐藏层,这里根据需要可增加隐藏层维度和数量,然后一般使用relu激活函数,这里跟深度学习的网路设置是一样的。 +输入为n_states,输出为n_actions,包含一个128维度的隐藏层,这里根据需要可增加隐藏层维度和数量,然后一般使用relu激活函数,这里跟深度学习的网路设置是一样的。 ### Replay Buffer @@ -107,8 +107,8 @@ class ReplayBuffer: 在类中建立两个网络,以及optimizer和memory, ```python -self.policy_net = MLP(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device) -self.target_net = MLP(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device) +self.policy_net = MLP(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device) +self.target_net = MLP(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device) for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # copy params from policy net target_param.data.copy_(param.data) self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) @@ -124,7 +124,7 @@ def choose_action(self, state): if random.random() > self.epsilon(self.frame_idx): action = self.predict(state) else: - action = random.randrange(self.action_dim) + action = random.randrange(self.n_actions) return action ``` diff --git a/codes/DQN/agent.py b/codes/DQN/dqn.py similarity index 87% rename from codes/DQN/agent.py rename to codes/DQN/dqn.py index 2e1e5de..e36f1d7 100644 --- a/codes/DQN/agent.py +++ b/codes/DQN/dqn.py @@ -5,7 +5,7 @@ @Email: johnjim0816@gmail.com @Date: 2020-06-12 00:50:49 @LastEditor: John -LastEditTime: 2021-09-15 13:35:36 +LastEditTime: 2021-12-22 14:01:37 @Discription: @Environment: python 3.7.7 ''' @@ -21,15 +21,15 @@ import math import numpy as np class MLP(nn.Module): - def __init__(self, state_dim,action_dim,hidden_dim=128): + def __init__(self, n_states,n_actions,hidden_dim=128): """ 初始化q网络,为全连接网络 - state_dim: 输入的特征数即环境的状态数 - action_dim: 输出的动作维度 + n_states: 输入的特征数即环境的状态数 + n_actions: 输出的动作维度 """ super(MLP, self).__init__() - self.fc1 = nn.Linear(state_dim, hidden_dim) # 输入层 + self.fc1 = nn.Linear(n_states, hidden_dim) # 输入层 self.fc2 = nn.Linear(hidden_dim,hidden_dim) # 隐藏层 - self.fc3 = nn.Linear(hidden_dim, action_dim) # 输出层 + self.fc3 = nn.Linear(hidden_dim, n_actions) # 输出层 def forward(self, x): # 各层对应的激活函数 @@ -62,9 +62,9 @@ class ReplayBuffer: return len(self.buffer) class DQN: - def __init__(self, state_dim, action_dim, cfg): + def __init__(self, n_states, n_actions, cfg): - self.action_dim = action_dim # 总的动作个数 + self.n_actions = n_actions # 总的动作个数 self.device = cfg.device # 设备,cpu或gpu等 self.gamma = cfg.gamma # 奖励的折扣因子 # e-greedy策略相关参数 @@ -73,8 +73,8 @@ class DQN: (cfg.epsilon_start - cfg.epsilon_end) * \ math.exp(-1. * frame_idx / cfg.epsilon_decay) self.batch_size = cfg.batch_size - self.policy_net = MLP(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device) - self.target_net = MLP(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device) + self.policy_net = MLP(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device) + self.target_net = MLP(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device) for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # 复制参数到目标网路targe_net target_param.data.copy_(param.data) self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # 优化器 @@ -90,7 +90,7 @@ class DQN: q_values = self.policy_net(state) action = q_values.max(1)[1].item() # 选择Q值最大的动作 else: - action = random.randrange(self.action_dim) + action = random.randrange(self.n_actions) return action def update(self): if len(self.memory) < self.batch_size: # 当memory中不满足一个批量时,不更新策略 diff --git a/codes/DQN/dqn_cnn.py b/codes/DQN/dqn_cnn.py new file mode 100644 index 0000000..0f4302c --- /dev/null +++ b/codes/DQN/dqn_cnn.py @@ -0,0 +1,133 @@ +import torch +import torch.nn as nn +import torch.optim as optim +import torch.autograd as autograd +import random +import math +class CNN(nn.Module): + def __init__(self, input_dim, output_dim): + super(CNN, self).__init__() + + self.input_dim = input_dim + self.output_dim = output_dim + + self.features = nn.Sequential( + nn.Conv2d(input_dim[0], 32, kernel_size=8, stride=4), + nn.ReLU(), + nn.Conv2d(32, 64, kernel_size=4, stride=2), + nn.ReLU(), + nn.Conv2d(64, 64, kernel_size=3, stride=1), + nn.ReLU() + ) + + self.fc = nn.Sequential( + nn.Linear(self.feature_size(), 512), + nn.ReLU(), + nn.Linear(512, self.output_dim) + ) + + def forward(self, x): + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + return x + + def feature_size(self): + return self.features(autograd.Variable(torch.zeros(1, *self.input_dim))).view(1, -1).size(1) + + + def act(self, state, epsilon): + if random.random() > epsilon: + state = Variable(torch.FloatTensor(np.float32(state)).unsqueeze(0), volatile=True) + q_value = self.forward(state) + action = q_value.max(1)[1].data[0] + else: + action = random.randrange(env.action_space.n) + return action + +class ReplayBuffer: + def __init__(self, capacity): + self.capacity = capacity # 经验回放的容量 + self.buffer = [] # 缓冲区 + self.position = 0 + + def push(self, state, action, reward, next_state, done): + ''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition) + ''' + if len(self.buffer) < self.capacity: + self.buffer.append(None) + self.buffer[self.position] = (state, action, reward, next_state, done) + self.position = (self.position + 1) % self.capacity + + def sample(self, batch_size): + batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移 + state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等 + return state, action, reward, next_state, done + + def __len__(self): + ''' 返回当前存储的量 + ''' + return len(self.buffer) + +class DQN: + def __init__(self, n_states, n_actions, cfg): + + self.n_actions = n_actions # 总的动作个数 + self.device = cfg.device # 设备,cpu或gpu等 + self.gamma = cfg.gamma # 奖励的折扣因子 + # e-greedy策略相关参数 + self.frame_idx = 0 # 用于epsilon的衰减计数 + self.epsilon = lambda frame_idx: cfg.epsilon_end + \ + (cfg.epsilon_start - cfg.epsilon_end) * \ + math.exp(-1. * frame_idx / cfg.epsilon_decay) + self.batch_size = cfg.batch_size + self.policy_net = CNN(n_states, n_actions).to(self.device) + self.target_net = CNN(n_states, n_actions).to(self.device) + for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # 复制参数到目标网路targe_net + target_param.data.copy_(param.data) + self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # 优化器 + self.memory = ReplayBuffer(cfg.memory_capacity) # 经验回放 + + def choose_action(self, state): + ''' 选择动作 + ''' + self.frame_idx += 1 + if random.random() > self.epsilon(self.frame_idx): + with torch.no_grad(): + state = torch.tensor([state], device=self.device, dtype=torch.float32) + q_values = self.policy_net(state) + action = q_values.max(1)[1].item() # 选择Q值最大的动作 + else: + action = random.randrange(self.n_actions) + return action + def update(self): + if len(self.memory) < self.batch_size: # 当memory中不满足一个批量时,不更新策略 + return + # 从经验回放中(replay memory)中随机采样一个批量的转移(transition) + state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample( + self.batch_size) + # 转为张量 + state_batch = torch.tensor(state_batch, device=self.device, dtype=torch.float) + action_batch = torch.tensor(action_batch, device=self.device).unsqueeze(1) + reward_batch = torch.tensor(reward_batch, device=self.device, dtype=torch.float) + next_state_batch = torch.tensor(next_state_batch, device=self.device, dtype=torch.float) + done_batch = torch.tensor(np.float32(done_batch), device=self.device) + q_values = self.policy_net(state_batch).gather(dim=1, index=action_batch) # 计算当前状态(s_t,a)对应的Q(s_t, a) + next_q_values = self.target_net(next_state_batch).max(1)[0].detach() # 计算下一时刻的状态(s_t_,a)对应的Q值 + # 计算期望的Q值,对于终止状态,此时done_batch[0]=1, 对应的expected_q_value等于reward + expected_q_values = reward_batch + self.gamma * next_q_values * (1-done_batch) + loss = nn.MSELoss()(q_values, expected_q_values.unsqueeze(1)) # 计算均方根损失 + # 优化更新模型 + self.optimizer.zero_grad() + loss.backward() + for param in self.policy_net.parameters(): # clip防止梯度爆炸 + param.grad.data.clamp_(-1, 1) + self.optimizer.step() + + def save(self, path): + torch.save(self.target_net.state_dict(), path+'dqn_checkpoint.pth') + + def load(self, path): + self.target_net.load_state_dict(torch.load(path+'dqn_checkpoint.pth')) + for target_param, param in zip(self.target_net.parameters(), self.policy_net.parameters()): + param.data.copy_(target_param.data) \ No newline at end of file diff --git a/codes/DQN/task0.py b/codes/DQN/task0.py index 7c20144..937f412 100644 --- a/codes/DQN/task0.py +++ b/codes/DQN/task0.py @@ -9,11 +9,10 @@ import torch import datetime from common.utils import save_results, make_dir from common.utils import plot_rewards -from DQN.agent import DQN -from DQN.train import train,test +from DQN.dqn import DQN curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 -algo_name = "DQN" # 算法名称 +algo_name = 'DQN' # 算法名称 env_name = 'CartPole-v0' # 环境名称 class DQNConfig: @@ -51,25 +50,82 @@ def env_agent_config(cfg, seed=1): ''' env = gym.make(cfg.env_name) # 创建环境 env.seed(seed) # 设置随机种子 - state_dim = env.observation_space.shape[0] # 状态数 - action_dim = env.action_space.n # 动作数 - agent = DQN(state_dim, action_dim, cfg) # 创建智能体 + n_states = env.observation_space.shape[0] # 状态数 + n_actions = env.action_space.n # 动作数 + agent = DQN(n_states, n_actions, cfg) # 创建智能体 return env, agent +def train(cfg, env, agent): + ''' 训练 + ''' + print('开始训练!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + rewards = [] # 记录所有回合的奖励 + ma_rewards = [] # 记录所有回合的滑动平均奖励 + for i_ep in range(cfg.train_eps): + ep_reward = 0 # 记录一回合内的奖励 + state = env.reset() # 重置环境,返回初始状态 + while True: + action = agent.choose_action(state) # 选择动作 + next_state, reward, done, _ = env.step(action) # 更新环境,返回transition + agent.memory.push(state, action, reward, next_state, done) # 保存transition + state = next_state # 更新下一个状态 + agent.update() # 更新智能体 + ep_reward += reward # 累加奖励 + if done: + break + if (i_ep+1) % cfg.target_update == 0: # 智能体目标网络更新 + agent.target_net.load_state_dict(agent.policy_net.state_dict()) + rewards.append(ep_reward) + if ma_rewards: + ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward) + else: + ma_rewards.append(ep_reward) + if (i_ep+1)%10 == 0: + print('回合:{}/{}, 奖励:{}'.format(i_ep+1, cfg.train_eps, ep_reward)) + print('完成训练!') + return rewards, ma_rewards -cfg = DQNConfig() -plot_cfg = PlotConfig() -# 训练 -env, agent = env_agent_config(cfg, seed=1) -rewards, ma_rewards = train(cfg, env, agent) -make_dir(plot_cfg.result_path, plot_cfg.model_path) # 创建保存结果和模型路径的文件夹 -agent.save(path=plot_cfg.model_path) # 保存模型 -save_results(rewards, ma_rewards, tag='train', - path=plot_cfg.result_path) # 保存结果 -plot_rewards(rewards, ma_rewards, plot_cfg, tag="train") # 画出结果 -# 测试 -env, agent = env_agent_config(cfg, seed=10) -agent.load(path=plot_cfg.model_path) # 导入模型 -rewards, ma_rewards = test(cfg, env, agent) -save_results(rewards, ma_rewards, tag='test', path=plot_cfg.result_path) # 保存结果 -plot_rewards(rewards, ma_rewards, plot_cfg, tag="test") # 画出结果 +def test(cfg,env,agent): + print('开始测试!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + # 由于测试不需要使用epsilon-greedy策略,所以相应的值设置为0 + cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon + cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon + rewards = [] # 记录所有回合的奖励 + ma_rewards = [] # 记录所有回合的滑动平均奖励 + for i_ep in range(cfg.test_eps): + ep_reward = 0 # 记录一回合内的奖励 + state = env.reset() # 重置环境,返回初始状态 + while True: + action = agent.choose_action(state) # 选择动作 + next_state, reward, done, _ = env.step(action) # 更新环境,返回transition + state = next_state # 更新下一个状态 + ep_reward += reward # 累加奖励 + if done: + break + rewards.append(ep_reward) + if ma_rewards: + ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) + else: + ma_rewards.append(ep_reward) + print(f"回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.1f}") + print('完成测试!') + return rewards,ma_rewards +if __name__ == "__main__": + cfg = DQNConfig() + plot_cfg = PlotConfig() + # 训练 + env, agent = env_agent_config(cfg, seed=1) + rewards, ma_rewards = train(cfg, env, agent) + make_dir(plot_cfg.result_path, plot_cfg.model_path) # 创建保存结果和模型路径的文件夹 + agent.save(path=plot_cfg.model_path) # 保存模型 + save_results(rewards, ma_rewards, tag='train', + path=plot_cfg.result_path) # 保存结果 + plot_rewards(rewards, ma_rewards, plot_cfg, tag="train") # 画出结果 + # 测试 + env, agent = env_agent_config(cfg, seed=10) + agent.load(path=plot_cfg.model_path) # 导入模型 + rewards, ma_rewards = test(cfg, env, agent) + save_results(rewards, ma_rewards, tag='test', path=plot_cfg.result_path) # 保存结果 + plot_rewards(rewards, ma_rewards, plot_cfg, tag="test") # 画出结果 diff --git a/codes/DQN/task1.py b/codes/DQN/task1.py index cf93829..ac9e559 100644 --- a/codes/DQN/task1.py +++ b/codes/DQN/task1.py @@ -1,3 +1,13 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2021-12-22 11:14:17 +LastEditor: JiangJi +LastEditTime: 2021-12-22 11:40:44 +Discription: 使用 Nature DQN 训练 CartPole-v1 +''' import sys import os curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 @@ -9,9 +19,7 @@ import torch import datetime from common.utils import save_results, make_dir from common.utils import plot_rewards, plot_rewards_cn -from DQN.agent import DQN -from DQN.train import train,test - +from DQN.dqn import DQN curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 algo_name = "DQN" # 算法名称 @@ -58,26 +66,83 @@ def env_agent_config(cfg, seed=1): ''' env = gym.make(cfg.env_name) # 创建环境 env.seed(seed) # 设置随机种子 - state_dim = env.observation_space.shape[0] # 状态数 - action_dim = env.action_space.n # 动作数 - agent = DQN(state_dim, action_dim, cfg) # 创建智能体 + n_states = env.observation_space.shape[0] # 状态数 + n_actions = env.action_space.n # 动作数 + agent = DQN(n_states, n_actions, cfg) # 创建智能体 return env, agent +def train(cfg, env, agent): + ''' 训练 + ''' + print('开始训练!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + rewards = [] # 记录所有回合的奖励 + ma_rewards = [] # 记录所有回合的滑动平均奖励 + for i_ep in range(cfg.train_eps): + ep_reward = 0 # 记录一回合内的奖励 + state = env.reset() # 重置环境,返回初始状态 + while True: + action = agent.choose_action(state) # 选择动作 + next_state, reward, done, _ = env.step(action) # 更新环境,返回transition + agent.memory.push(state, action, reward, next_state, done) # 保存transition + state = next_state # 更新下一个状态 + agent.update() # 更新智能体 + ep_reward += reward # 累加奖励 + if done: + break + if (i_ep+1) % cfg.target_update == 0: # 智能体目标网络更新 + agent.target_net.load_state_dict(agent.policy_net.state_dict()) + rewards.append(ep_reward) + if ma_rewards: + ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward) + else: + ma_rewards.append(ep_reward) + if (i_ep+1)%10 == 0: + print('回合:{}/{}, 奖励:{}'.format(i_ep+1, cfg.train_eps, ep_reward)) + print('完成训练!') + return rewards, ma_rewards -cfg = DQNConfig() -plot_cfg = PlotConfig() -# 训练 -env, agent = env_agent_config(cfg, seed=1) -rewards, ma_rewards = train(cfg, env, agent) -make_dir(plot_cfg.result_path, plot_cfg.model_path) # 创建保存结果和模型路径的文件夹 -agent.save(path=plot_cfg.model_path) # 保存模型 -save_results(rewards, ma_rewards, tag='train', - path=plot_cfg.result_path) # 保存结果 -plot_rewards_cn(rewards, ma_rewards, plot_cfg, tag="train") # 画出结果 -# 测试 -env, agent = env_agent_config(cfg, seed=10) -agent.load(path=plot_cfg.model_path) # 导入模型 -rewards, ma_rewards = test(cfg, env, agent) -save_results(rewards, ma_rewards, tag='test', - path=plot_cfg.result_path) # 保存结果 -plot_rewards_cn(rewards, ma_rewards, plot_cfg, tag="test") # 画出结果 +def test(cfg,env,agent): + print('开始测试!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + # 由于测试不需要使用epsilon-greedy策略,所以相应的值设置为0 + cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon + cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon + rewards = [] # 记录所有回合的奖励 + ma_rewards = [] # 记录所有回合的滑动平均奖励 + for i_ep in range(cfg.test_eps): + ep_reward = 0 # 记录一回合内的奖励 + state = env.reset() # 重置环境,返回初始状态 + while True: + action = agent.choose_action(state) # 选择动作 + next_state, reward, done, _ = env.step(action) # 更新环境,返回transition + state = next_state # 更新下一个状态 + ep_reward += reward # 累加奖励 + if done: + break + rewards.append(ep_reward) + if ma_rewards: + ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) + else: + ma_rewards.append(ep_reward) + print(f"回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.1f}") + print('完成测试!') + return rewards,ma_rewards +if __name__ == "__main__": + cfg = DQNConfig() + plot_cfg = PlotConfig() + # 训练 + env, agent = env_agent_config(cfg, seed=1) + rewards, ma_rewards = train(cfg, env, agent) + make_dir(plot_cfg.result_path, plot_cfg.model_path) # 创建保存结果和模型路径的文件夹 + agent.save(path=plot_cfg.model_path) # 保存模型 + save_results(rewards, ma_rewards, tag='train', + path=plot_cfg.result_path) # 保存结果 + plot_rewards_cn(rewards, ma_rewards, plot_cfg, tag="train") # 画出结果 + # 测试 + env, agent = env_agent_config(cfg, seed=10) + agent.load(path=plot_cfg.model_path) # 导入模型 + rewards, ma_rewards = test(cfg, env, agent) + save_results(rewards, ma_rewards, tag='test', + path=plot_cfg.result_path) # 保存结果 + plot_rewards_cn(rewards, ma_rewards, plot_cfg, tag="test") # 画出结果 diff --git a/codes/DQN/task2.py b/codes/DQN/task2.py new file mode 100644 index 0000000..8e2de34 --- /dev/null +++ b/codes/DQN/task2.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2021-12-22 11:14:17 +LastEditor: JiangJi +LastEditTime: 2021-12-22 15:27:48 +Discription: 使用 DQN-cnn 训练 PongNoFrameskip-v4 +''' +import sys +import os +curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 +parent_path = os.path.dirname(curr_path) # 父路径 +sys.path.append(parent_path) # 添加路径到系统路径 + +import gym +import torch +import datetime +from common.utils import save_results, make_dir +from common.utils import plot_rewards, plot_rewards_cn +from common.atari_wrappers import make_atari, wrap_deepmind +from DQN.dqn import DQN + +curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 +algo_name = 'DQN-cnn' # 算法名称 +env_name = 'PongNoFrameskip-v4' # 环境名称 +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测GPU +class DQNConfig: + ''' 算法相关参数设置 + ''' + + def __init__(self): + self.algo_name = algo_name # 算法名称 + self.env_name = env_name # 环境名称 + self.device = device # 检测GPU + self.train_eps = 500 # 训练的回合数 + self.test_eps = 30 # 测试的回合数 + # 超参数 + self.gamma = 0.95 # 强化学习中的折扣因子 + self.epsilon_start = 0.90 # e-greedy策略中初始epsilon + self.epsilon_end = 0.01 # e-greedy策略中的终止epsilon + self.epsilon_decay = 500 # e-greedy策略中epsilon的衰减率 + self.lr = 0.0001 # 学习率 + self.memory_capacity = 100000 # 经验回放的容量 + self.batch_size = 64 # mini-batch SGD中的批量大小 + self.target_update = 4 # 目标网络的更新频率 + self.hidden_dim = 256 # 网络隐藏层 +class PlotConfig: + ''' 绘图相关参数设置 + ''' + + def __init__(self) -> None: + self.algo_name = algo_name # 算法名称 + self.env_name = env_name # 环境名称 + self.device = device # 检测GPU + self.result_path = curr_path + "/outputs/" + self.env_name + \ + '/' + curr_time + '/results/' # 保存结果的路径 + self.model_path = curr_path + "/outputs/" + self.env_name + \ + '/' + curr_time + '/models/' # 保存模型的路径 + self.save = True # 是否保存图片 + + +def env_agent_config(cfg, seed=1): + ''' 创建环境和智能体 + ''' + env = make_atari(cfg.env_name) # 创建环境 + # env = wrap_deepmind(env) + # env = wrap_pytorch(env) + env.seed(seed) # 设置随机种子 + n_states = env.observation_space.shape[0] # 状态数 + n_actions = env.action_space.n # 动作数 + agent = DQN(n_states, n_actions, cfg) # 创建智能体 + return env, agent + +def train(cfg, env, agent): + ''' 训练 + ''' + print('开始训练!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + rewards = [] # 记录所有回合的奖励 + ma_rewards = [] # 记录所有回合的滑动平均奖励 + for i_ep in range(cfg.train_eps): + ep_reward = 0 # 记录一回合内的奖励 + state = env.reset() # 重置环境,返回初始状态 + while True: + action = agent.choose_action(state) # 选择动作 + next_state, reward, done, _ = env.step(action) # 更新环境,返回transition + agent.memory.push(state, action, reward, next_state, done) # 保存transition + state = next_state # 更新下一个状态 + agent.update() # 更新智能体 + ep_reward += reward # 累加奖励 + if done: + break + if (i_ep+1) % cfg.target_update == 0: # 智能体目标网络更新 + agent.target_net.load_state_dict(agent.policy_net.state_dict()) + rewards.append(ep_reward) + if ma_rewards: + ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward) + else: + ma_rewards.append(ep_reward) + if (i_ep+1)%10 == 0: + print('回合:{}/{}, 奖励:{}'.format(i_ep+1, cfg.train_eps, ep_reward)) + print('完成训练!') + return rewards, ma_rewards + +def test(cfg,env,agent): + print('开始测试!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + # 由于测试不需要使用epsilon-greedy策略,所以相应的值设置为0 + cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon + cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon + rewards = [] # 记录所有回合的奖励 + ma_rewards = [] # 记录所有回合的滑动平均奖励 + for i_ep in range(cfg.test_eps): + ep_reward = 0 # 记录一回合内的奖励 + state = env.reset() # 重置环境,返回初始状态 + while True: + action = agent.choose_action(state) # 选择动作 + next_state, reward, done, _ = env.step(action) # 更新环境,返回transition + state = next_state # 更新下一个状态 + ep_reward += reward # 累加奖励 + if done: + break + rewards.append(ep_reward) + if ma_rewards: + ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) + else: + ma_rewards.append(ep_reward) + print(f"回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.1f}") + print('完成测试!') + return rewards,ma_rewards +if __name__ == "__main__": + cfg = DQNConfig() + plot_cfg = PlotConfig() + # 训练 + env, agent = env_agent_config(cfg, seed=1) + rewards, ma_rewards = train(cfg, env, agent) + make_dir(plot_cfg.result_path, plot_cfg.model_path) # 创建保存结果和模型路径的文件夹 + agent.save(path=plot_cfg.model_path) # 保存模型 + save_results(rewards, ma_rewards, tag='train', + path=plot_cfg.result_path) # 保存结果 + plot_rewards_cn(rewards, ma_rewards, plot_cfg, tag="train") # 画出结果 + # 测试 + env, agent = env_agent_config(cfg, seed=10) + agent.load(path=plot_cfg.model_path) # 导入模型 + rewards, ma_rewards = test(cfg, env, agent) + save_results(rewards, ma_rewards, tag='test', + path=plot_cfg.result_path) # 保存结果 + plot_rewards_cn(rewards, ma_rewards, plot_cfg, tag="test") # 画出结果 diff --git a/codes/DQN/train.ipynb b/codes/DQN/train.ipynb deleted file mode 100644 index 2529826..0000000 --- a/codes/DQN/train.ipynb +++ /dev/null @@ -1,423 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "from pathlib import Path\n", - "curr_path = str(Path().absolute()) # 当前路径\n", - "parent_path = str(Path().absolute().parent) # 父路径\n", - "sys.path.append(parent_path) # 添加路径到系统路径\n", - "\n", - "import math,random\n", - "import gym\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "import torch.nn.functional as F\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "from IPython.display import clear_output # 清空单元格输出区域" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 网络模型" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "class MLP(nn.Module):\n", - " def __init__(self, state_dim,action_dim,hidden_dim=128):\n", - " \"\"\" 初始化q网络,为全连接网络\n", - " state_dim: 输入的特征数即环境的状态数\n", - " action_dim: 输出的动作维度\n", - " \"\"\"\n", - " super(MLP, self).__init__()\n", - " self.fc1 = nn.Linear(state_dim, hidden_dim) # 输入层\n", - " self.fc2 = nn.Linear(hidden_dim,hidden_dim) # 隐藏层\n", - " self.fc3 = nn.Linear(hidden_dim, action_dim) # 输出层\n", - " \n", - " def forward(self, x):\n", - " # 各层对应的激活函数\n", - " x = F.relu(self.fc1(x)) \n", - " x = F.relu(self.fc2(x))\n", - " return self.fc3(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 经验回放" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "class ReplayBuffer:\n", - " def __init__(self, capacity):\n", - " self.capacity = capacity # 经验回放的容量\n", - " self.buffer = [] # 缓冲区\n", - " self.position = 0 \n", - " \n", - " def push(self, state, action, reward, next_state, done):\n", - " ''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition)\n", - " '''\n", - " if len(self.buffer) < self.capacity:\n", - " self.buffer.append(None)\n", - " self.buffer[self.position] = (state, action, reward, next_state, done)\n", - " self.position = (self.position + 1) % self.capacity \n", - " \n", - " def sample(self, batch_size):\n", - " batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移\n", - " state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等\n", - " return state, action, reward, next_state, done\n", - " \n", - " def __len__(self):\n", - " ''' 返回当前存储的量\n", - " '''\n", - " return len(self.buffer)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## DQN" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "class DQN:\n", - " def __init__(self, state_dim, action_dim, cfg):\n", - "\n", - " self.action_dim = action_dim # 总的动作个数\n", - " self.device = cfg.device # 设备,cpu或gpu等\n", - " self.gamma = cfg.gamma # 奖励的折扣因子\n", - " # e-greedy策略相关参数\n", - " self.frame_idx = 0 # 用于epsilon的衰减计数\n", - " self.epsilon = lambda frame_idx: cfg.epsilon_end + \\\n", - " (cfg.epsilon_start - cfg.epsilon_end) * \\\n", - " math.exp(-1. * frame_idx / cfg.epsilon_decay)\n", - " self.batch_size = cfg.batch_size\n", - " self.policy_net = MLP(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device)\n", - " self.target_net = MLP(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device)\n", - " for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # 复制参数到目标网路targe_net\n", - " target_param.data.copy_(param.data)\n", - " self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # 优化器\n", - " self.memory = ReplayBuffer(cfg.memory_capacity) # 经验回放\n", - "\n", - " def choose_action(self, state):\n", - " ''' 选择动作\n", - " '''\n", - " self.frame_idx += 1\n", - " if random.random() > self.epsilon(self.frame_idx):\n", - " with torch.no_grad():\n", - " state = torch.tensor([state], device=self.device, dtype=torch.float32)\n", - " q_values = self.policy_net(state)\n", - " action = q_values.max(1)[1].item() # 选择Q值最大的动作\n", - " else:\n", - " action = random.randrange(self.action_dim)\n", - " return action\n", - " def update(self):\n", - " if len(self.memory) < self.batch_size: # 当memory中不满足一个批量时,不更新策略\n", - " return\n", - " # 从经验回放中(replay memory)中随机采样一个批量的转移(transition)\n", - " state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample(\n", - " self.batch_size)\n", - " # 转为张量\n", - " state_batch = torch.tensor(state_batch, device=self.device, dtype=torch.float)\n", - " action_batch = torch.tensor(action_batch, device=self.device).unsqueeze(1) \n", - " reward_batch = torch.tensor(reward_batch, device=self.device, dtype=torch.float) \n", - " next_state_batch = torch.tensor(next_state_batch, device=self.device, dtype=torch.float)\n", - " done_batch = torch.tensor(np.float32(done_batch), device=self.device)\n", - " q_values = self.policy_net(state_batch).gather(dim=1, index=action_batch) # 计算当前状态(s_t,a)对应的Q(s_t, a)\n", - " next_q_values = self.target_net(next_state_batch).max(1)[0].detach() # 计算下一时刻的状态(s_t_,a)对应的Q值\n", - " # 计算期望的Q值,对于终止状态,此时done_batch[0]=1, 对应的expected_q_value等于reward\n", - " expected_q_values = reward_batch + self.gamma * next_q_values * (1-done_batch)\n", - " loss = nn.MSELoss()(q_values, expected_q_values.unsqueeze(1)) # 计算均方根损失\n", - " # 优化更新模型\n", - " self.optimizer.zero_grad() \n", - " loss.backward()\n", - " for param in self.policy_net.parameters(): # clip防止梯度爆炸\n", - " param.grad.data.clamp_(-1, 1)\n", - " self.optimizer.step()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### DQN参数" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "class DQNConfig:\n", - " def __init__(self):\n", - " self.algo = \"DQN\" # 算法名称\n", - " self.env = 'CartPole-v0' # 环境名称\n", - " self.train_eps = 200 # 训练的回合数\n", - " self.test_eps = 20 # 测试的回合数\n", - " self.gamma = 0.95 # 强化学习中的折扣因子\n", - " self.epsilon_start = 0.90 # e-greedy策略中初始epsilon\n", - " self.epsilon_end = 0.01 # e-greedy策略中的终止epsilon\n", - " self.epsilon_decay = 500 # e-greedy策略中epsilon的衰减率\n", - " self.lr = 0.0001 # 学习率\n", - " self.memory_capacity = 100000 # 经验回放的容量\n", - " self.batch_size = 64 # mini-batch SGD中的批量大小\n", - " self.target_update = 4 # 目标网络的更新频率\n", - " self.device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # 检测GPU\n", - " self.hidden_dim = 256 # 网络隐藏层" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 创建环境" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "def env_agent_config(cfg,seed=1):\n", - " ''' 创建环境和智能体\n", - " '''\n", - " env = gym.make(cfg.env) # 创建环境\n", - " env.seed(seed) # 设置随机种子\n", - " state_dim = env.observation_space.shape[0] # 状态数\n", - " action_dim = env.action_space.n # 动作数\n", - " agent = DQN(state_dim,action_dim,cfg) # 创建智能体\n", - " return env,agent" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 训练" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "开始训练!\n", - "环境:CartPole-v0, 算法:DQN, 设备:cuda\n", - "回合:10/200, 奖励:12.0\n", - "回合:20/200, 奖励:16.0\n", - "回合:30/200, 奖励:15.0\n", - "回合:40/200, 奖励:14.0\n", - "回合:50/200, 奖励:13.0\n", - "回合:60/200, 奖励:27.0\n", - "回合:70/200, 奖励:36.0\n", - "回合:80/200, 奖励:33.0\n", - "回合:90/200, 奖励:200.0\n", - "回合:100/200, 奖励:200.0\n", - "回合:110/200, 奖励:200.0\n", - "回合:120/200, 奖励:200.0\n", - "回合:130/200, 奖励:200.0\n", - "回合:140/200, 奖励:200.0\n", - "回合:150/200, 奖励:200.0\n", - "回合:160/200, 奖励:200.0\n", - "回合:170/200, 奖励:200.0\n", - "回合:180/200, 奖励:200.0\n", - "回合:190/200, 奖励:200.0\n", - "回合:200/200, 奖励:200.0\n", - "完成训练!\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXsAAAEcCAYAAAAmzxTpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABYfklEQVR4nO3deXwU9f348dfMXrlPkhAIgiDEACKBACqHAlZRUbSWSql41m+tFk+8KgVFsUWtWi0WrVZr5SdqRRAEEW/xQJBLBBHCTe773mNmfn9sdsmxuTfJJnk/Hw8eJDvXezaT937yns98PophGAZCCCG6NbWzAxBCCNH+JNkLIUQPIMleCCF6AEn2QgjRA0iyF0KIHkCSvRBC9ACS7GuYMmUKX3/9dYcfd+vWrVx44YUdflxx0vHjx0lOTsblcvl93xs3buTcc88lNTWVPXv2+H3/bVFVVcXNN9/M6NGjue222zo7nHZ3//338/TTT3d2GJ1Ckn0ASEtLY8OGDZ0dhmgnS5Ys4c9//jPbt29n6NCh9ZYnJyczcuRIUlNTGTduHNdeey3r1q2rt96nn37Kr371K0aOHMm4ceOYN28e2dnZ3uUrV64kOTmZf/3rX7W2mzRpEps3b/YZ2wcffEBeXh6bN2/m2WefbeOZupWVlbF48WLOO+88UlNTOf/881m8eDEFBQWt2t/KlSv5zW9+U+u1+++/n+HDh5OamsrYsWO5/vrrSU9P90f4reJwOHjggQcYNWoU48eP55VXXum0WBoiyb4DaJrW2SG0WXc4h86SkZHB4MGDG11n9erVbN++nfXr13PFFVewaNEi/vGPf3iXf/DBB9x9991ce+21fPvtt6xduxaLxcLs2bMpKSnxrhcVFcVLL71EWVlZs2MbMGAAZrO5xefl668gh8PBtddey4EDB3jppZf4/vvvefPNN4mKiuKHH37wyzE8brzxRrZv387nn39OTEwMDzzwQIv37y/PPfccR44c4dNPP+W1117jpZde4osvvui0eHyRZN8AXdd58cUXOf/88xk3bhy33347RUVF3uW33XYb48ePZ/To0fz2t79l//793mX3338/Cxcu5KabbmLkyJFs3ryZKVOm8PLLL3PppZcyevRo7rjjDux2OwCbN29m0qRJ3u0bWxfgX//6FxMmTGDChAm8/fbbJCcnc+TIEZ/nUVRUxAMPPMCECRMYM2YMt9xyC+C7tVRzP3XP4eWXX2b8+PG1kv7GjRu59NJLm/V+1fXWW2/xi1/8grFjx3LzzTfXaqEmJyfzxhtvcMEFF5CWlsbDDz9MQw96a5rGsmXLOP/880lNTeWXv/wlmZmZPssyc+bM4e233/Zut2TJEsaNG8fUqVP5/PPPa+33nXfe4aKLLiI1NZWpU6eyYsWKBs9F13Wef/55Jk+ezNlnn829995LaWkpDoeD1NRUNE1jxowZnH/++Q3uwyMmJobLL7+chx56iBdeeIHCwkIMw2DJkiX84Q9/4NJLLyUoKIi4uDgWL15McHAwr732mnf7gQMHkpqayquvvtrksZ599lmef/551q9fT2pqKm+//XaD5wInS11vv/025513Htdee229fa5evZrMzEz+8Y9/cNppp6GqKrGxsdx6662ce+65AN7rJDU1lYsvvpiNGzd6t1+5ciWzZs3iscceY9y4cdx5550sXLiQHTt2kJqaSlpaWr1jBgcHc+mll3p/B9PT05kzZw5paWlccsklfPzxxw2+B59++ikzZswgLS2NWbNm8dNPP/lc78UXX6xX5nr00Ud59NFHAXj33Xe55ZZbiIyMZNCgQcycOZN33323sbe/w0myb8B///tfPvroI15//XW+/PJLIiMjWbRokXf5pEmT2LBhA9988w1Dhw5l3rx5tbZfu3YtN998M9u2bWP06NEArF+/npdeeomPP/6Yffv2sXLlygaP39C6X3zxBa+++iqvvPIKGzdubPDPc497772XyspK3n//fb7++muuu+66Zr8HNc/h2muvJTg4mG+//da7fM2aNd5k39T7VdM333zD3/72N5555hk2bdpE3759ueuuu2qt89lnn/G///2P9957j/Xr1/Pll1/63Ncrr7zC+++/z4svvsi2bdt47LHHCAoKavLc3nrrLT799FNWrVrFO++8wwcffFBreWxsLC+88ALbtm3jL3/5C3/5y1/48ccffe5r5cqVvPvuu7z22mt89NFHVFRUsGjRIqxWK9u3bwfcSfCjjz5qMi6PqVOnomkau3bt4uDBg2RkZDBt2rRa66iqygUXXMCmTZtqvX777bfzn//8p9EPW3A3WH7/+99z0UUXsX37dmbOnNngudS0ZcsW1q1bx8svv1xvn19//TUTJ04kNDS0weP269eP5cuX8/333/PHP/6Re+65h5ycHO/yXbt20a9fP7766iueeOIJHn74YUaOHMn27dvZunVrvf2Vl5ezZs0aUlJScDqd3HzzzYwfP56vv/6a+fPnM2/ePA4ePFhvuz179vCnP/2JRYsWsXnzZq666ipuueUWHA5HvXUvueQSPv/8c+9fTJqm8cEHHzB9+nSKi4vJzc3l9NNP965/+umnc+DAgQbfg84gyb4BK1as4M4776R3795YrVb++Mc/smHDBm9L8Ve/+hVhYWFYrVbmzp3LTz/95G0BgfuXdfTo0aiqis1mA9wty4SEBKKiopg8eTJ79+5t8PgNrbt+/Xp++ctfMnjwYIKDg5k7d26D+8jJyeGLL77g4YcfJjIyEovFwtixY5v9HtQ9h0suuYS1a9cC7rrsF198wSWXXNKs96umNWvWcOWVVzJs2DCsVit33XUXO3bs4Pjx4951brrpJiIiIujTpw/jxo1rsMX19ttvc/vttzNw4EAUReH0008nOjq6yXNbv3491157LYmJiURFRfH73/++1vLzzjuPU045BUVRGDt2LOPHj/eZaDznc91119GvXz9CQ0O56667WLduXZtu9losFqKjoykuLqawsBCA+Pj4euvFxcV5l3ukpKRwzjnn1KvdN0dzzmXu3LmEhIT4/FAtKioiLi6u0WNcdNFFJCQkoKoqF198Mf3792fXrl3e5fHx8cyZMwez2dzoB/e///1v0tLSuOCCCygvL+evf/0rO3fupKKigv/7v//DarVy9tlnM3nyZN5///1627/55ptcddVVnHnmmZhMJq644gosFgs7duyot27fvn0ZOnSo9wP722+/JSgoiJEjR1JRUQFAeHi4d/3w8HDKy8sbfR86WssLdT1ERkYGt956K6p68vNQVVXy8/Pp1asXTz/9NB988AEFBQXedQoLC70/8MTExHr7rPlLEBwcXKs109x1c3JyGD58uHeZr+N4ZGVlERkZSWRkZFOn61PdfV966aXMmjWLhx9+mI0bNzJ06FD69u0LNP5+JSQk1NpPTk4Ow4YN834fGhpKVFQU2dnZJCUlAfXPv6FfnKysLE455ZQWn1tOTk6t8+vTp0+t5Z9//jlLly7l8OHD6LpOVVUVQ4YMaXBfnvcB3InB5XL5PPfmcjqdFBQUEBkZ6f3wysnJoV+/frXWy83N9fnhdttttzFz5kyuv/76Fh23sXPx6N27d4PbR0VFkZub2+gxVq1axSuvvMKJEycAqKioqPWB1dj+a7rhhhu48847a722e/duevfuXes67NOnT60yoUdGRgarVq3i9ddf977mdDrJycnhvffeY+HChQCMHj2al156ienTp7N27Vouv/xy1q5dy/Tp0wEICQkB3A0gT8OurKys0b9uOoMk+wb07t2bxx57zFuCqWnVqlV8/PHHvPLKKyQlJVFaWsqYMWMarCv7U3x8fK0LNzMzs8F1e/fuTXFxMSUlJURERNRaFhwcTFVVlff7pn5BAU477TT69OnDF198Ueti9xyroffL1zl4ftHB/cteVFTUqsTYu3dvjh49Wi8Re34Bq6qqCAsLA2qfY1xcXK33rubXDoeD2267jSVLljB16lQsFgu33HJLgz/fuueTkZGB2WwmNja2xefj8fHHH2MymRgxYgRRUVH07t2bDz74gJtuusm7jq7rfPjhh0yZMqXe9oMGDeKCCy5g2bJlLTpuY+eSlZUFgKIoDW5/zjnn8Mwzz1BRUeH9GdR04sQJ5s+fz6uvvkpqaiomk4kZM2bUWqfu/hs7nq/4s7Ky0HXdm/AzMzMZMGBAvXUTExO5+eab+cMf/uBzX5dddlmt7y+66CKWLFlCVlYWGzdu5M033wQgMjKSuLg4fvrpJ8aPHw/ATz/9xGmnndbsuDuClHEa8Jvf/IZnnnnGe+EXFBR4/4QrLy/HarUSHR1NZWUlTz31VIfFNW3aNFauXEl6ejqVlZU8//zzDa4bHx/PpEmTePjhhykuLsbpdLJlyxbAXVPcv38/e/fuxW6389xzzzXr+NOnT+c///kPW7ZsqVVDbuz98rWPlStXsnfvXhwOB0899RQjRozwtupbYubMmfz973/n8OHDGIbBTz/9RGFhITExMSQkJLB69Wo0TeN///sfx44d82530UUX8d///pesrCyKi4t58cUXvcscDgcOh4OYmBjMZjOff/45X331VZPvybFjxygvL+fpp5/moosualUPl6KiIt577z0WLVrETTfdRHR0NIqicN999/HPf/6TNWvWYLfbyc3N5cEHH6SwsJCrr77a575uvfVW3nnnnVrlxaa09VxmzJhB7969mTt3Lunp6ei6TmFhIcuWLePzzz+nsrISRVGIiYkB3DfCa3Zu8CU2Npbs7GyftfS6RowYQVBQEC+99BJOp5PNmzfzySefcPHFF9dbd+bMmaxYsYKdO3diGAYVFRV89tlnDfZkiomJYezYsTzwwAMkJSUxaNAg77LLL7+cf/7znxQXF5Oens7bb7/NFVdc0WS8HUla9g245pprMAyDG264gZycHGJjY7n44os5//zzufzyy9m0aRMTJ04kKiqK22+/nTfeeKND4jr33HOZM2cO11xzDYqicMstt7Bq1SqsVqvP9R9//HH+8pe/cNFFF+F0Ohk3bhxjxozh1FNP5dZbb+W6664jKCiIu+66y9tSacz06dN56qmnmDRpkvcXFhp/v+o655xzuP3225k7dy4lJSWkpqa2+kGX66+/HofDwQ033EBhYSEDBw5k6dKlADzyyCM8/PDDPP300/zqV78iNTXVu92vf/1rDh8+zIwZMwgNDeXGG2/03nwOCwtj/vz53HHHHTgcDiZPnuyz9exx5ZVXkp2dzdVXX43dbmfChAn8+c9/btF5zJgxA0VRsFgsJCcn88ADD3hvfgNcfPHFWK1W/vnPfzJ//nxvWem///2vz1o+uG+Ezpgxo0XXZlvPxWq18uqrr/Lss89yww03UFJSQmxsLFOnTmXEiBFER0dzww03MGvWLBRF4fLLL2fUqFGN7vOss87itNNOY8KECSiK0minBKvVyrJly3j44Yd54YUXSEhI4PHHH6+VmD3OOOMMHnnkERYtWsSRI0cICgpi1KhRPnv8eEyfPp377ruPe+65p9brt912GwsXLmTy5MkEBQVx00031ephFwgUmbyka0tPT2f69On88MMPrWpJiq5p06ZN3H333bz66qukpKR0djiiC5AyThe0ceNGHA4HxcXFPPHEE0yePFkSfQ8zYcIE/vKXv/jsOSKEL9Ky74JuvPFGduzYgclkYsyYMSxcuLDBP+WFEAIk2QshRI8gZRwhhOgBJNkLIUQPIMleCCF6gIDuwlFYWI6ut/yWQmxsGPn5zRvitSNJXC0XqLFJXC0TqHFB4MbWmrhUVSE62vcwDQGd7HXdaFWy92wbiCSulgvU2CSulgnUuCBwY/NnXFLGEUKIHkCSvRBC9AABXcapyzAMCgtzcTiqgIb/vMnJUdF1veMCa6buHZeC1RpEdHRci0YpFEJ0jCaTfWFhIffeey9Hjx7FarXSv39/Fi1aRExMDDt27GDBggXY7Xb69u3LE0884R3WtbFlrVVWVoyiKCQkJKEoDf9RYjaruFyBl1S7c1yGoVNUlEdZWTHh4VH+CUwI4TdNlnEUReF3v/sdGzZsYM2aNfTr148nn3wSXde55557WLBgARs2bCAtLY0nn3wSoNFlbVFZWUZ4eFSjiV50DkVRCQ+PprIy8Ho1CCGakeyjoqIYN26c9/uRI0eSkZHB7t27sdls3uFAZ82a5Z3Hs7FlbaHrGiZTl6o89Sgmkxld15pesZMYhoHur3+6H/clcXWb2DRdb/O/9ir1tihz6rrOG2+8wZQpU8jMzKw1lVtMTAy6rlNUVNTosqioqDYFLPXgwBVIP5t/rfmRuKhgLp84EICcwgoW/nsLdmfgfhh1BAWdYMVJiGInSHFiVVxYFRc2qv+v8ZoVFxZFw4SBSdExUf2vztcqOubq11QMFM8/Be/Xhzn5tVpjGVB7G896ysmvGz8fo873TZ1//f2VNLp97fXVDrjEc4hl0P/9ze/7bVGyf+SRRwgJCeHqq69m48aNfg+mrtjYsFrf5+SomM3NK+E0d72O1l5xLVq0kJSUFGbOnNWq7f0Vl6qqxMWFN71iC7Rmf0dzyrC7DO+2B7PLsDs1LjpnANHhDU9i3VWpmgOrowirowSrsxirw/OvBIurHLOrErOrApNW1WQC9dAVM7pqxlBMGIoJXTVVf61iKObq/y3oSs3XVVBUb9pGofprFUNxp3NNqV4GGJ6vlRqp3lumVaqX19V0Sq/JqLd6x27fUiHx/bzXrT9/l5qd7JcsWcKRI0dYtmwZqqqSmJhIRkaGd7ln4u2oqKhGl7VEfn5ZrYcKdF1v1o3EzrgR6nK5mhxT3l9x+TqWUf3naGv278/3S9d1cnObPw1eU+Liwlu1P7tDo7i0yrvt8Sx3+23ymYn0igzutLjaynA50PIOo+cfQy/K9P4zygvqrasEhaOERqOER6HYklBsoShBYe7/baFgDUYx21AsNvD+b0UxV3+v+q9h0lnvV3MEYmy5uaWtiktVlXqNZI9mJfunnnqK3bt38+KLL3qnvxs+fDhVVVVs3bqVtLQ0VqxY4Z2TtLFl3cmECWlcf/1NfPPNV4wbdzazZ8/hueeeJj19Pw6Hg9TUNObOvZMTJ47xpz/dy4oV/8PlcnHJJVO59tobmT37Gj7+eCNffvkZDz20mDfeeJ2PP/4QTXNhtdqYN+9+Bg9O9nmsyy+/kkcfXUh+fh69eyd6J1cGWL16JW+99f+wWKwYhs6iRX+lf/8BnfIedYT0E8X877N07p41ErPJ/T5oukGF3eVdp7TCPX9peIjv6RsDlV5eiJaxFy07HS0nHT3/GBjVpShLEGpUIqY+KahRvVHDexHdty8lziCUkCgUc9c6V9G+mkz2+/fv54UXXmDAgAHMmuUuESQlJbF06VIef/xxFi5cWKt7Jbj/lG9omT999UMmm3Zl1ntdUcBo41PGE0YkMv6MxCbXs9lsvPTSawD89a+PMHLkKO6//8/ous7DD8/n/fff47LLrqCiopy8vFyOHz/BqacOYuvWLcyefQ3ff/8daWljAJg27RJ+8xv35NFbtmzmiSf+wosvvurzWA8+eA9nnpnKDTf8HydOHOe662YzbtzZADz//N9ZvvwdevXqhcPhCMi+/f50KLOEfceKKKt0EhVmA8Cl6VTYT14EpRVObBYTNoups8JsFsMw0HMP4Tq0FdexH9ALqidJtwRhijsV65kXYUoYhBrb391qr1PmCI4LpyzAWqkiMDSZ7AcPHsy+fft8Lhs1ahRr1qxp8bLu5KKLpnu/3rTpC/bu/ZEVK5YDUFVVRXx8AgCjRqWxZct3nDhxghkzfsny5a/hdDrZuvU7rr76OgD27dvLf//7CiUlxaiqyrFjRxs81rZt33PHHe5Jj/v2TfJ+YLiPNYbFixcyfvxEzj57An37JrXLuQcKT6WvZslP04xaN2NLKxyEh1g6OrRm00tycO77Emf6ZoySHFBMmHoPxjr215iThqHG9PNrWUX0PF26H+P4M3y3vjuyZh8cHFLjO4PHHnvSZ3IdPXoMW7e6k/2CBY+wY8c2PvpoA4YBffr0xel08uc/38c//vEvkpNPJy8vl8svv6iRYzXssceeYO/eH/n++63cdtvNzJv3AGefPb4tpxnQPEneVSPZu3Qdh1PHpemYTSqlFc6AS/aGYaBl7MW5eyOuIztAAVOfoVhGTsd86mh3XV0IP5Gmgh+NHz+J11//D5rmblEWFRWRkXECcCf7zZu/obS0lPj4BNLSxvLyyy94W+QOhx1N07x/Caxc+Xajxxo9Oo33338PgIyME2zdugVw37zNyDjB0KHDmTPnOsaOPYv9+33/ZdZd6NU1u7ote4Aqh/tn4U72gVPDdmXspWLVI1S+/zha9gGsqdMJnf0UIZfcg+X0SZLohd916ZZ9oLn99rt5/vlnue6636AoChaLldtuu5s+ffoSH59ASEgoI0aMBNzJPzs7i1Gj3A+ehYaGceONv+emm64hIiKSyZOnNnGseTz66EI++mgDiYl9SE0dDbh7wyxe/BBlZaUoikpCQgI33/zHdj3vzuZJ8lr1/4ZheL+usLsIC7ZQWukgKa7zE6hWcAz7t2+iHd+NEhqDbeJ1WAafIzdTRbuTZN8GmzZtrfV9SEgo8+Y90OD6b731rre8FBMTy5dfbqm1/Le/vZbf/vZa7/dz5lzf4LHi4uL5+9//6fM4zz//UvNOoJvwtOw1zf3eajVa+JVVLgzD6PSWvaG5cGxfg2P7WrAGYTtrFpahUyTJiw4jyV50eZ6W/cmkfzLZV9hdVDk0nC6902r2Wt4Rqj77F3rBccynnY3tnNmoQf598EyIpkiyF12epyHvSfKuGl1NK+0uSiudQOf0sXce+Iaqz/+NYgsl+MLbMfdP7fAYhABJ9qIbMIzaNfuaLftKu6vGA1Ud17I3dB37d2/j3LUeU2IyQeffihoc0WHHF6IuSfaiy/OWcTxdMLWTLfsKu4vSio5t2Ruak6qPnsd1ZDuWoVOxnfMbFFV+1UTnkitQdHland44rjo3aDuyZW9oTio3LkU7ugPbOVdjHX5+ux9TiOaQZC+6PL1eGad2y95S4X6cpL2Tfa1EP+EarEOntOvxhGgJSfaiyzOqc7tWfWO2bs1eVRQsZrVdx8UxdJ2qj5e5E/34OZLoRcCRJ2hFgxYvfoh33nmzs8NoUt0naGv1s7e7KKkeF6c9J1exf/cWrsPfYzv7N1iHNf5AnBCdQZJ9AHO5XE2v1AWP5W91yzg1b9BWVt+gDQ9uv5uzzn1f4tz1AZZhU7GecWG7HUeItujSZRznz1/h3PdFvdcVRfF2x2stS/IkLEMaHzxswoQ0brrpD3z55ecUFxdz330PsnXrd2ze/DUul4tHHlnCgAGnkp+fx0MPPUhFRTl2u4NzzhnPLbfc3uA+WzJG/uuvv9XmMfK//fYrxo7tumPk123Re5K9SVW8D1UlRDdvELmW0vKPUrXpNUx9UrCdPbtdjiGEP3TpZB8IwsLCeeml1/jkk4944IG7eeihx7j55j+yfPl/eO21f7NgwSOEhYWzZMnTRESEUVXl4K67/si3337NWWed43OfLRsjP4+srIw2jZH/yiuv43LpXXaM/IbKOGEhFgpK7JSUOxg3NMHvxzWcdio/WopiCyVo6h9Q1MAeK1/0bF062VuGjPfZ+u7IIY6nTr0AgOTk0wGF8eMnVn+fwueffwq4Byd7/vm/s3v3LgzDID8/n/37f24w2bdkjPzvv/+OzMyMHj1Gfv2Wvfv/8GArx3PLABjYx/8PNNk3v4lRnE3w9PvkgSkR8Lp0sg8EnmkaVVXFaj3ZtU9VVe9Qx2++uZzS0hJefvk1TCYLS5YsxuGwN7jPloyR//33W8jI6Nlj5HuHS/AmffcHfc2ulgN6+zcZVxzciXPPJ1jOuBBznxS/7luI9tCsG7RLlixhypQpJCcn8/PPPwNw/PhxZsyY4f03ZcoUxo4d691mypQpTJs2zbv8yy+/bJ8z6AJKS0uJje2FzWYjNzeHTZs+b/a2MkZ+0/Q6/es9XS89yT4hOpiwYP/1sTdcDvLWv4AS2RvbmCv9tl8h2lOzWvZTp07lmmuu4be//a33taSkJFavXu39fvHixd6E5PHss88yZMgQP4Xadc2cOYs///k+Zs+eSa9e8YwePabpjao1PUZ+SI8fI79uzd5zg9bTA+dUP5dwHNvX4Cpyl29kiGLRVShGC7qtTJkyhWXLltVL4A6Hg0mTJvHyyy8zbNiwRtdtifz8slqzD2VlHaF37/5NbteRNfuW6AlxNfdn1FxxceHkNjGB9nPv7GL7/jxmnjeIi87qz1c/ZPLy+3u5fMKprNp0iNnnD+b8tH5+iUcvyaH8rT8RNvQclHNu8Ms+/ak571dnCNS4IHBja01cqqoQGxvmc5lfavaffPIJCQkJ3kTvMW/ePAzDYPTo0dx1111ERLSshVU36JwcFbO5eY8GNHe9jtbd41JVlbg4/47V3tT+LBb3ZRwUbCUuLpzgkHwABvWPQfnqEGed2ddvMWV/+QKKyUTM5KsxRwTmmPT+fv/9JVDjgsCNzZ9x+SXZv/POO1x5Ze3a5fLly0lMTMThcLB48WIWLVrEk08+2aL91m3Z67rerBZoT2hB+5M/49J13a+tpOa0bqrs7lEtS0uryM0tpai4EoCkmGAev/kcwiyqX2LSsg9QsfcbrKMvxxwR221agx0hUOOCwI3N3y37NjfnsrOz2bJlC5deemmt1xMTEwF3b5XZs2ezbdu2th4KoM0PS4n201k/m7qjXXq6XppNCrGRQX47jn3LOyjBEVhHXNT0ykIEmDYn+3fffZdzzz2X6Oho72sVFRWUlro/kQzDYN26daSktL17mqqa0LSu+1h/d6dpLtROeLCo7nj2nq6XZtV/JTNX5j60jL1YR16CYrH5bb9CdJRmlXEeffRRPvzwQ/Ly8rj++uuJiori/fffB9zJ/sEHH6y1fn5+PnPnzkXTNHRdZ9CgQSxcuLDNwQYHh1FaWkRUVCyKEpi1757KMHRKSwsJDvb9J2T7Hrt2kve07E0m/w185tj6LkpIFJaUyX7bpxAdqVnJfv78+cyfP9/nsg0bNtR7rV+/fqxatapNgfkSFhZJYWEu2dnHgYZLBqqqBtTj/B7dOy4FqzWIsLBIv8TUEnWfoNVqjI3jD1pOOlrmT9jO+o10tRRdVpd6glZRFGJi4ptcrzvdcOkIgRpXc3nu4ddM+iZV8duQxo6d68EaguX0SX7ZnxCdQWohosvzNcSxv0o4ekkOrsPfYx06GcUa7Jd9CtEZJNmLLq/uHLSaZvjt5qxj90egqFiGyVyyomuTZC+6PKNGkgd3F0x/tOwNlx3nz5swDxiNGhrd9AZCBDBJ9qLL846NY5ws45hNbb+0XenfgaMCi8wnK7oBSfaiy/MOcVxj1Et/9MRx7PkUNaoPpsTkNu9LiM4myV50efW6Xuo6pja27LWC4+i5B7GknNuuE5UL0VEk2Ysur960hJqBuY01e9f+r0ExYT7t7DbHJ0QgkGQvujxfE463pYxj6DrO/V9j6neGTDcoug1J9qLLq9vPXtONNt2g1TL2YFQU+ZzfWIiuSpK96PL83bJ3HvgGrMGYTznTL/EJEQgk2Ysur+5wCa42tOwNzYXr8HbM/UfJODiiW5FkL7q8ukMbt6XrpXZij7tv/cDmzxMsRFcgyV50eUbdmn0bHqpyHtwClmBMScOaXlmILkSSvejy6nW9bOVwCYbuwnVkG+YBqSgmi19jFKKzSbIXXZ5nKP623qDVsvaDvRzzgNH+DE+IgCDJXnR5dUe9dGmtu0HrOrIDVDNmKeGIbqhZk5csWbKEDRs2cOLECdasWcOQIUMAmDJlClarFZvNPSfnvHnzmDhxIgA7duxgwYIF2O12+vbtyxNPPEFsbGw7nYboybw1e63GcAmtaNm7ju7E1Od0FIv/JikXIlA0q/kzdepUli9fTt++feste/bZZ1m9ejWrV6/2Jnpd17nnnntYsGABGzZsIC0tjSeffNK/kQtRzTvhuNH6lr1elIVRnIW5/0h/hydEQGjWb0RaWhqJiYnN3unu3bux2WykpaUBMGvWLD744IPWRShEIwzD8M5GrLXhBq3r6E4AeZBKdFttnoN23rx5GIbB6NGjueuuu4iIiCAzM5M+ffp414mJiUHXdYqKioiKimr2vmNjw1odV1xceKu3bU8SV8s1FptnWGNwJ/64uHA03SA8LKhF55SZswdLryQSBg70S1ydSeJquUCNzZ9xtSnZL1++nMTERBwOB4sXL2bRokV+Ldfk55d5/0RviUCdQFviarmmYnO6TiZ7l6aTm1uKy6XjsDubfU6Gy0HlkT1YUs5r9jaB+p5JXC0XqLG1Ji5VVRpsJLepN46ntGO1Wpk9ezbbtm3zvp6RkeFdr6CgAFVVW9SqF6I5ajYGNM1AN9z/WnKDVss+AJpTeuGIbq3Vyb6iooLSUvenjmEYrFu3jpSUFACGDx9OVVUVW7duBWDFihVMmzbND+EKUZvnpqzZpKDphrdHTktu0GonfgTFhKm3zEgluq9mlXEeffRRPvzwQ/Ly8rj++uuJiopi2bJlzJ07F03T0HWdQYMGsXDhQgBUVeXxxx9n4cKFtbpeCuFvJ5O9SpVDw1Vdw2/JDVrX8R8xJQxCsQa3S4xCBIJmJfv58+czf/78eq+vWrWqwW1GjRrFmjVrWh2YEM3hKeNYzO5k76xO9ma1eS17o6oMPe8I1tGXt1eIQgQEeYJWdGmekr2nbON0Vif7ZrbsXVk/AwamPqe3R3hCBAxJ9qJL87bsq5O93akBNHvCcS1zH5jMmOKb3+VSiK5Ikr3o0jxDJVjM7kvZ4apO9s3sjaNl/YwpfpCMcim6PUn2okvzPDVr9iR7Z/Nv0BqOSvS8I5h6D2m/AIUIEJLsRZem123ZV5dxmnODVstJB0PHlChdLkX3J8ledGn1a/aeG7TNSPaZ+0BRMcUPar8AhQgQkuxFl1a3N463Zt+MMo6WtR819hTpXy96BEn2oksz9IbKOI0ne0PX0XIPSate9BiS7EWXVnO4BKh5g7bxS1svPAEuO6YESfaiZ5BkL7q0ejdom1nG0XLS3etJ/3rRQ0iyF12a5i3jmIAaN2ib6I2j56SDLRQlIqF9AxQiQEiyF12aUT2cvac3jsPZ3Jb9QffDVErL56oVoiuSZC+6NG/N3lxds6+ezKSxJ2gNRyV6YYbcnBU9iiR70aXV7WdfVuEAIMTW8ICuWt5hwMAUf2p7hydEwJBkL7q0ujdoSyqcAIQGNzzWjZ53GAC114B2jU2IQCLJXnRpNScvASgpd2Czmhp9glbLO4ISGoMaHNEhMQoRCJo1ecmSJUvYsGEDJ06cYM2aNQwZMoTCwkLuvfdejh49itVqpX///ixatIiYmBgAkpOTGTJkCGp1r4jHH3+c5GQZg0T4l17noarSCgdhQY1f1nreUUy9+rd7bEIEkma17KdOncry5cvp27ev9zVFUfjd737Hhg0bWLNmDf369ePJJ5+std2KFStYvXo1q1evlkQv2oVepzdOeZWL0KCGSziG045elCklHNHjNCvZp6WlkZiYWOu1qKgoxo0b5/1+5MiRZGRk+Dc6IZpQt2YPTdTr848ChrTsRY/TrDJOU3Rd54033mDKlCm1Xp8zZw6apjFp0iTmzp2L1Wr1x+GE8PKUcWrW6EMbKeNoeUcAUCXZix7GL8n+kUceISQkhKuvvtr72meffUZiYiJlZWXcc889LF26lDvvvLNF+42NDWt1THFx4a3etj1JXC3XWGxhJ0oAiI0N9b4WGx3S4DY5ZRm4QqOI79+vzQ9UBep7JnG1XKDG5s+42pzslyxZwpEjR1i2bJn3ZizgLfuEhYUxc+ZMXnnllRbvOz+/zNtya4m4uHByc0tbvF17k7harqnYioorAKgot3tfM0GD21ScSEeJTiIvr6xd4+osElfLBWpsrYlLVZUGG8lt6nr51FNPsXv3bpYuXVqrRFNcXExVVRUALpeLDRs2kJKS0pZDCeFT3eESAEKDfbdhDF1DLzqBGpPUEaEJEVCa1bJ/9NFH+fDDD8nLy+P6668nKiqKZ555hhdeeIEBAwYwa9YsAJKSkli6dCkHDx5kwYIFKIqCy+UiNTWV22+/vV1PRPRMmu7jBm0DvXH0kmzQXJhi+nVIbEIEkmYl+/nz5zN//vx6r+/bt8/n+qmpqaxZs6ZtkQnRDN7eODVa9mEN9MbRC44DSMte9EjyBK3o0k4OhNZ0bxy94DgoKmpUos/lQnRnkuxFl2bo9Vv2DfWz1wuOo0YmoJilC7DoeSTZiy7NO+F4M2r2WsFxKeGIHkuSvejSPF1zTaqCp9e8rzKO4azCKMmRZC96LL88VCVEZ/HU7FVFwWRSUBUFq8VUf71C91AekuxFTyXJXnRpNVv2qqo03O2yyJ3sTVF9fS4XoruTMo7o0rwtexVMqtpwT5zCDFBNKBFxHRmeEAFDkr3o0jwte0VRMKlKw33sizLdPXHU+iUeIXoCSfaiS/P0xlGbKONoRZmokdK/XvRckuxFl+Zp2auKQq/IIBJ7hdZbx9Bc7p440X06OjwhAobcoBVdmm4YqNVDFT9w9ShOdsCssU5JNhi6PDkrejRJ9qJL0w0Dz8jaJtX3H6p6USYAapS07EXPJWUc0aXpuoGqNj4JibePfVTvjghJiIAkyV50abqOt4zT4DpFmSihMSiWoA6KSojAI8ledGk1a/YNrlOUKfV60eNJshddmrtm33CyN3QdvVBmpxJCkr3o0gzdoLGSvVGSA5oTkyR70cM1meyXLFnClClTSE5O5ueff/a+fujQIa666iouvPBCrrrqKg4fPtysZUL4k24YKI1ke63gGCADoAnRZLKfOnUqy5cvp2/f2gNILVy4kNmzZ7NhwwZmz57NggULmrVMCH/S9MZr9nrhCUCRB6pEj9dksk9LSyMxsfbNrfz8fPbs2cP06dMBmD59Onv27KGgoKDRZUL4m667R7xscHnBcZSIeBSzrQOjEiLwtOqhqszMTBISEjCZ3INKmUwm4uPjyczMxDCMBpfFxMT4L3IhAKOJ3jhawXGp1wtBgD9BGxsb1upt4+LC/RiJ/0hcLddYbBarGYtF9bmO7rRTWpJD2BkTiWmH8wvU90ziarlAjc2fcbUq2ScmJpKdnY2maZhMJjRNIycnh8TERAzDaHBZS+Xnl3kHumqJuLhwcnNLW7xde5O4Wq6p2CorHei64XMdLe8wGDpVQXF+P79Afc8krpYL1NhaE5eqKg02klvV9TI2NpaUlBTWrl0LwNq1a0lJSSEmJqbRZUL4m27QYD97veA4ID1xhIBmtOwfffRRPvzwQ/Ly8rj++uuJiori/fff56GHHuL+++/n+eefJyIigiVLlni3aWyZEP6kN9IbRys4DiYzakRCB0clROBpMtnPnz+f+fPn13t90KBBvP322z63aWyZEP7U2HAJesFx1Ki+MjuVEMgTtMIPtu/P5V9rfuyUYzc26qVecBw1RiYYFwIk2Qs/+CE9n29/zMYwWn4zva1qjmdfk1FVhlFRJN0uhagmyV60WXG5AwNwaXqHH7uhmr1WeAKQm7NCeEiyF21WWuEEwO7shGRv+B7PXveMiRMtyV4IkGQv/KCkwgGAw6l1+LEbGuJYLzgO1hCU0OgOj0mIQCTJXrRZSbk72ds7Idk3NMSxXnACU0wSShMTmwjRU0iyF23icGpUOdxJvjOSvab7HuJYL8lGjZQ5Z4XwkGQv2sRTrwdwdErN3sBUp/VuuBwYlSUo4bEdHo8QgUqSvWgTT70eOqdlr+v1h0swytzDaathvTo8HiEClSR70Saeej10zg1aX0Mc62X5AChh0rIXwkOSvWiTTm/Z+5iWUC/LA0CVMo4QXpLsRZvUbtk3XbP/8XABR7L8N5ys7qM3jlGWD4oi3S6FqEGSvWiT0gonnipKc1r2/2/jz6z9+rDfjq/5GBtHL81HCYlGUQN6bh4hOpQke9EmJeUOosPd87s2J9nbnRoOl/967fiq2RtleahSrxeiFkn2ok1KKtzJ3qQqzSrjOJw6Tpf/avu+Ji/Ry/Kl26UQdUiyF21SUu4kIsSK1WJqVsve4dRw+nHAtLoDoRm6jlFWKN0uhahDkr1ok5IKB+EhVmwWtcmul7ph4HDpOBsp4+w+mN+ieYfrjo1jVBSCoUm3SyHqaNMdrOPHj3Prrbd6vy8tLaWsrIzvvvuOKVOmYLVasdnc9dx58+YxceLEtkUrAopuGJRWOIgIbV7L3pPkG0r2GXnlPPXWTm69Yjijk+ObF0Od3ji654EqKeMIUUubkn1SUhKrV6/2fr948WI07eQv/LPPPsuQIUPacggRwCrtLgwDwoIt2CymJmv2npZ/Q8m+0u4CIK+4qtkx1B3i2CjJAUCRMo4QtfitjONwOFizZg1XXnmlv3YpApwnuVvNKrZmtOw96zeU7D2vF5bam3V8u0PD4dSwWU/OMatl7wdLsAyCJkQdfuuI/Mknn5CQkMCwYcO8r82bNw/DMBg9ejR33XUXERERLdpnbGxYq+OJiwtv9bbtqTvFpVXPBxgTHUJYiJVKh6vR/VRV53iXbvhc71h+JQAVDq3W8ob2uXl3JppucPaIvt51juUeIPiUFOITIlt8Pi3VnX6WHSFQ44LAjc2fcfkt2b/zzju1WvXLly8nMTERh8PB4sWLWbRoEU8++WSL9pmfX9aim3UecXHh5Ob67ylNf+lucWXnlQNQVelAwaC8wtnofrJz3MscTs3nenn5ZQBk5Zd7lzcW2xfbjhNsMxEfYSU3txS9sgRn3nGUgWe3+/vc3X6W7S1Q44LAja01camq0mAj2S9lnOzsbLZs2cKll17qfS0xMREAq9XK7Nmz2bZtmz8OJQKIZ85Zs0nFajE12RvHXqNm72tyck+XzKJmlHF0w2Bneh7DTo3FbHJfxlrWz+54EpObfxJC9BB+Sfbvvvsu5557LtHR7rFIKioqKC11fyIZhsG6detISUnxx6FEAPHU2C1mFZtFxd7Ew1KOGstdmo9kX72/ojK7zw+Dmo5ml1Jc5uDMQSd73WiZ+8BkRe01oLmnIESP4ZcyzrvvvsuDDz7o/T4/P5+5c+eiaRq6rjNo0CAWLlzoj0OJAOJJzs1t2dfsreN06VjMtdsanpa9SzMorXQ/rNWQn44UAXDGwNrJ3pQwCMUkY+IIUZdffis2bNhQ6/t+/fqxatUqf+xaBDBPGcfi6Y3jcJdnGpr3teaHga+naGv20ikqtTea7CvsLlRFITzEAoBemoeefxTr2F+16lyE6O7kCVrRat4yTnXLXjcMtEZuqNccAM3X+DiuGssLmqjbO5waVovq/WBxHdrijmXg2OafgBA9iCR70Wqe1rnZrGKrLsk01te+5jJffe3rtuwb43DpWC0n+9c7D25Bje2PGtG8J2+F6Gkk2YtWq3mD1lr9YFNjT9E6mkr2mo6igELTD1Y5nBrW6g8YvSwfPecg5oFjWnoKQvQYcidLtJqnZW8xuWv20HjLvu4N2nr7q26tB1lNFJY1new9x3Qd+8Edx6mjW3YCQvQg0rIXreaq2bI3e1r2jSR7V9Mte4tJJTrM1swyjvvydU9DqKJEJrT4HIToKSTZi1ar1bK3Nl2zr9Wyb6A3jsWsEh1uIyO/vNFJTtxlHPcHjF5ehBIShaLI5SxEQ+S3Q7Sap2VvNivexNt4sm+8Ze9yuVv2547sQ0GJnTc/OdDgvuxOzXuD1qgoRAmJas0pCNFjSLIXrebUdFRFwaSerNk3doPW7tLx9MBvsIxjVhkxqBcXju3HJ9tO8MOBPJ/7cjhrlHHKi1BDo9p0LkJ0d5LsRas5XTpmszt9exJvUy37kCCzd1vf+3PvZ8aEUwHYe7jA577sNcs40rIXokmS7EWruVwGlupByE627BtP9qHB7ideG6vZAwRZzUSEWsnKL/e9L5eOzaJiuBxgL0cJjW7TuQjR3UmyF63m1DRvcvZMIGJvrJ+9Syc0qDrZN9IbxyM+Kpis/Arf+6qu2RsVxQCo0rIXolGS7EWrOV2Gd3jh5t6gDQ32lHF8D5dQc3C0uKggMn207A3D8Nbs9YpCAGnZC9EESfai1Tw3VAHMJgVVUZoo47SsZR8XFUx+cSV2h8bL7+/hSJZ72GxNN9ANA6vZhFFeBCA1eyGaIMletJqnqySAoijYrCbsjkbGxnFp2CwqFrPaZM0e3MneMGDrvhy++iGL7ftzgZP3BdxlHHfLXso4QjROkr1otZotewCbRaWqiZa91WzCYlIb7o1Ts2YfHQzApl2ZAOQVVwEn7wtYLSp6eRGYzGALbfP5CNGdSbIXrVa3JW6zmpvsjWOzmrCY1VrDGTe0v/god7Lfd6wIOJnsPcMu2MwmjIoilJDoBsfQF0K4SbIXrebSarfEbRaVqgbKOC5NR9MNrGZ3Gcfh6wnaOn8pRIRavb18APKLK4GTD25ZLSpGRZGUcIRohjaPejllyhSsVis2mw2AefPmMXHiRHbs2MGCBQuw2+307duXJ554gtjY2Cb2JroSp0vHEnoyOQc1MjWhp2xjtbhb9g2VcWome0VR6B0TwpGsUnrHhJBTWImm67Vq9np5IabYU/x5WkJ0S35p2T/77LOsXr2a1atXM3HiRHRd55577mHBggVs2LCBtLQ0nnzySX8cSjThaHYp3+7J6pBj1a2xW62mBlv2NRN0zZq906Wz5qtD2J0amm7U2h9A71h3Lf6c4b3RDYPCErt3X0FaOUZZHkpYjN/PTYjupl3KOLt378Zms5GWlgbArFmz+OCDD9rjUKKOj78/zv/buL9DjlW37BJkMTXYz97uadmba/fG2XeskHe/PMTug/kA9SYhP/uMRM4alsDAPhGAu27v2VfkwQ9BN7Cefp5fz0uI7sgvk5fMmzcPwzAYPXo0d911F5mZmfTp08e7PCYmBl3XKSoqIioqqtn7jY0Na3VMcXHhrd62PbV3XIrqTqQtPU5r4tJ0g/Awm3fbiPAgDmeX+dxXhcs9N21cbBghwVZ0wyAuLhzlmPsJWK16iLToyOBa20+NC2fqmFPIzHM/XOUwICjYSrxaTPDRr4kYdQG9Bg9ucez+0FOvsdYK1LggcGPzZ1xtTvbLly8nMTERh8PB4sWLWbRoEb/4xS/8ERv5+WXojUxg3ZC4uHByc0v9EoM/dURcJWV2HA6NnJySZvdQaW1cDqeGy6md3FbXqaxykptbyo4DefTtFUpcdY+arBz3OlWVdgxDp6LSRW5uKRnZJQAcr/7fXr19zdiy9u3FvvVdZocWUbanCr3vSK4I2YJhsqINvbhTftY9+RprjUCNCwI3ttbEpapKg43kNpdxEhMTAbBarcyePZtt27aRmJhIRkaGd52CggJUVW1Rq160jsOlYQAureUfki1Vr+tljTLOstW7+WDzUQB2H8zncJY7mdftZ19W4QSgqNQB4K3ZG4aB8/A2ctY8R8XKheiZP3GG9ThnHH+LU/a9zlBrBpx5GWpwRLufpxDdQZta9hUVFWiaRnh4OIZhsG7dOlJSUhg+fDhVVVVs3bqVtLQ0VqxYwbRp0/wVs2iEJ9k6XFq9+rc/GYZR7watzWrCpRlU2l04nDq51V0lX1yzh/JKd1K3WkxYLSZvzb6s+vXicney98Ts2Lkex3dvoQaFYRkyHuuYX/GP/+3jQudGBhft4YQrmv7Dprbb+QnR3bQp2efn5zN37lw0TUPXdQYNGsTChQtRVZXHH3+chQsX1up6Kdqfpw+6exya9juOphsYUK9lD1BUPVl4fnEVFVVOb0IHd994i0nFVf1gVKkn2VdvYzGrONM34/juLcyDxtHv13eTVz3yZWxkCG+emMSsPv15c18Ij1mt7XeCQnQzbUr2/fr1Y9WqVT6XjRo1ijVr1rRl96IVPN0SG5u/1R88ZRhLnZY9QEHpyWSfXehu3Z85KJZ9x4qICrPV6mdfVuFu0Xs+IIIdBVRteQVTwmCCzvsdinryoarYyCC2/ORkf/g4CpTjqKo8NStEc/mlN44IHCfLOA2PK+8PLs9k4z5a9oUldm8MBzPctforJg2kb1woJrV210tPq7+0womKTvyP/w8UlaCpN6OYLLWO2SsyCE03yC6swGaRh7+FaAn5jelmapZx2pO3ZV+nnz1AYWmV97W9R9yjUsZFBWNSVe82nu09ZRwDGG45hrX4CEHjr0YNq/+0dWykuy6VkV/RrvcjhOiO5Demm/EMEtbuZZzqlrnZdLKUYq1TxgHYd7SQ8BALwbaTf0RaTCouzUDXDW9vHIBxtnT0oEjMg8b5PGavSHc3zpzCCqwWk891hBC+SRmnG9F03dvlst3LON6W/cmke7JlfzLZl1e5GNTnZPdIx48fMSQnndPM4ZSW24mklHNDfmKnox9DLSdw9r+gVp2+ptgI9/hLhnFyZiwhRPNIsu9GapZu2r2Mo/m4QVud7Auqa/aeyUziqselN5x27N++SV/NydwIcK7fx90RWYSpds4L2uPeycBzGjymxWwiMtRKcblDavZCtJD8xnQjNUecdLRzGcdVPfyB2XyyjOPpjVNYWoXVopJQneTjqssvruM/gObkwOA5vFF+NlQUUaoHsy7sStKd8ex0nIIpqnejx+1VXbeXMo4QLSPJvhux1yjd+BpC2J889wR8tezLq1yEBlmIjXAnZs+MU67D28AWSkVsMt/aB7N9+N0sKZmOHn86z5ZO499l5zV549Vzk9YqN2iFaBH5jelGHDWGF25sxih/8N6gNdfvZw8QYjN7b6jGRQVj6C5cR3diPmUkFou7S2VhmRMDld4xwd7tmp3spWUvRItIzb4bsbtqlnHau2XvLuPUbNlbzSoK7m6UIUFmeseGoCiQEB2Mc89nYC/HfOoogjR3os6oHskyPibEuw+LqfFk7/kAsUrNXogWkWTfjdS+QdveLfvqMk6dmaWs1TdlQ2xmJpyRyKnxQVg3v4I9/VtMicmYk85gsKZiNavsTM/DpCr0qi73mFSlyadiPaUhadkL0TLSPOpGak4c0t41e5ePlj2c7H4ZalNRi44Rv/0lXOnfYk37JcGX3IdithJsMzMqOQ7DgLBgC6HB7rKOuRl1eM8NWpt0vRSiRaRl343U7o3TQV0v6yRom8VElFrO9LzVVKwsdg99cN5NWIaMr7Xe+OGJfPtjNmEhFkKqH7hqqoQD7pq9qigEB8mlK0RLyG9MN9KhZRxX7Ru0hqOC8ncWMsdkwRJWjk2vIui8mzD1Od3n0Acp/aOJDrcREWJFVRWCbeZmDYFgs5i45zcj6RvX+lnMhOiJJNl3I56+9UFWU/uXceo8VOX44UOM0lxiCCXIVMm+U6/hrDqt+ZpUVeG2K0dgqq7RhwaZUZs5s1byKdFtjF6InkeSfTfiqdmHBVs6oDfOyZa9UVWGY9cGzANGs6LgHI4cyeRXcclN7qN/75Pza4YGWbwfIEII/5MbtC10KLOEBS9/R0WVs+mV2+Bwlvs4lXZXs7fxlHFCgy3tXsZxabq794yi4NjzCTirsKZdgcVqpcQIIaSFNfWwYLN0pxSiHbWpZV9YWMi9997L0aNHsVqt9O/fn0WLFhETE0NycjJDhgxBrR7W9vHHHyc5uenWXlvtXPE8EaFmBlzyOxTV/8njwIlijueWcTS7jNP7t1854edj7uPkFFbWagE3xuHUMJtUgiymDmnZe2rsWtbPqDFJmGKSsFnc49eHtjDZXzFpULuP1ClET9ambKgoCr/73e/YsGEDa9asoV+/fjz55JPe5StWrGD16tWsXr26QxI9wNFKG70yv6bqi39jGP5PeKXVMytlFVb4fd81eabpK2vBXxB2p4bNomKxqO2SOD/ZdpzlH/4M4J1/1jAMtNxDmOJOBU4+RRsSZGlwP74M7BMhtXgh2lGbkn1UVBTjxp0ce3zkyJFkZGS0Oai2OBo1lq/VNFw/b8K+6TUMw/Dr/kuqJ8bOLmjfZF9U5j5OzfHem+Jw6u4Jvc3t07Lfvj+PTbszMQyDgpIqosJsGKV5YC9HjRsAnOxnH2KT20FCBBK/1Tl0XeeNN95gypQp3tfmzJnDjBkz+Nvf/obD4fDXoRoVFWpjbfkIrCOn49z7GfYvX8XQm1/3bkpJuTv5ZhdU+m2fvnjmZK05WXdTHC6tOtmr7VKzLy6zY3doFJc7yCqspHdMMFruIQBMcQOBk8MYtLRmL4RoX377jXzkkUcICQnh6quvBuCzzz4jMTGRsrIy7rnnHpYuXcqdd97Zon3Gxra8L3Xf3uGUbztOr1/MoSLEStHXKzFV5JLwq/swBTe9P103eGbFNs4fewojTourt7yyOonmFlcRF9e8WnpNzd2mrMr9AWWoavOPo6iEBluICA9C04taFF9z1i2u/qArd+rkFVUyKbUvtvL9VJnMJCSfjmKyMG38QKIigzklyX8lmda8zx1B4mqZQI0LAjc2f8bll2S/ZMkSjhw5wrJly7w3ZBMTEwEICwtj5syZvPLKKy3eb35+GbresjKM54TSjxQQP/wygoLiqPrsZY69voiQS+5BsQQ1uv2hzBI+/f445RUOEiPrr5tf5G7RZ+WXk5Vd7J1XtTni4sLJzS1t1roFxe7j5OSVNXub0nI7qgKaS6PKrjV7u+bE5dJ07/2Kr7afQNMNIoLMlB3ehxpzCnkFVUAVQSqcNyKx2cf2R2ydQeJqmUCNCwI3ttbEpapKg43kNpdxnnrqKXbv3s3SpUuxWq0AFBcXU1XlnnTa5XKxYcMGUlJS2nqoZokKq46hugxiOe1sgs7/A3ruISrWPYleXtjo9jsP5AGw71iRz3p/aYWT0CAzmm6QV1xVb7k/OJwa5dUt+5bcoHU4NWxm9yBj/p68pLjsZBluZ7r7PUqIDkLLO4Kpul4vhAhcbUr2+/fv54UXXiAnJ4dZs2YxY8YMbr31Vg4ePMjMmTO57LLLuOyyyzCbzdx+++3+irlRUWHueUqLaiQny4DRBE39A3r+MSreWYDr2K4Gt995IB8Fd3LLKaxdl7c7NOxOjcFJUUD73aQtLj8Ze0tq9nbPDVqLyTuht7947iEAZOa7zztBLQJnFab4QX47jhCifbSpjDN48GD27dvnc9maNWvasutWi6xu2ddMTgCWgWMwxSRR+dFSKtc/hfXMi7GO+SWKevItKCy1cyS7lAlnJLLph0x+OlpIQo2x1kuqyxiD+0Wy40AeWQWVjGhFnks/UcyPhwu4bPypPpd7WtEmVaG8JTdonRq26hu04O4eWXNCkbbwfHj2igwir7iK0CAztoJ07IApcYhfjiGEaD/d7pHFsGALZpNSq+zgoUYlEnL5Aiynn4dj5zoq1vwV14k9GLq75OEpT1yQ2os+oS72H82rtb0n2feJDSU0yOydfKOlPvjuKKu+PERuke8ePZ4Pqt6xIS1r2bs0rBbV+7CTP0s5xeXumJJPiXLHFhOClrUPJTQGJayX344jhGgf3a5/nKIoREcE1WvZe5ebrQRNug5Tn9Op+vI/VL7/OEpwJJaUczH2l3NT5EEi31/OfTYNZ6aJyk/PwjpsKmrcqZSWO4lWy+h9ZD03xmSSk+4i5/OBRMXFY4obiNqrP0oTg3kZhsHPx4oA2JWez9TRSfXW8cTet1coPxwsaPa5O5w6VrPJO7FHzVEw26qozI6iwOCkKL76IYuE6GC0zH2Y+g5r8pyFEJ2v2yV7gJjwIO8N2oZYTjsL84BUXEd34dz3JY5t75EGVJmDsAw/nwPFVo7/vI9zDm3Ftf8r1Nj+hASdxt0RXxF8yMnAoAgSLJUE/fQz9upKlhqThPnUNMynjGww8WfkV1Ba/aDUzgN5PpN9cbkDk6rQOyaE7/bmoOl6s3r9OJwasUY+Cbl7CFKC/NqyLypzEBlqJTHWXdYaEFaJkVmCKbFjnowWQrRN90z2kUEczSppcj3FbMMycAyWgWMoLcjnT//azLSJp3PJ2QOJySvnqR3RhI+fxZigQzj3fUniiY/JNcKJvXw+tl5JZB0r4pG3thFlquLGkS4Si3/A8f1qHN+vQgmJwjxwDOYBozDF9EMJcneH2ne0EBWd8QOt/Hj0BJWlAwkKDUVRT9bWi0rtxISq9HUdxYRGeaWLiFBro+fi0nQ03WBoznqiKo7ycJQZtmehpV2AGpnY5tZ3cZmDyDAb/eLDGJwUyfAQd4lL6vVCdA3dMtlHh9vYtb/xln1dh4ug3AhiYJ8oABJjQ4gIsbAno4rxl07FOmwqK9dv5fOfivl7L3drfEi/KB664Sz+sfIHnv/RxRN/+BPYS9GO7cJ1eBvOPZ/i3L0RACUkCmdcX3pnVvFY9HGCi+z8KgJcb7xNsWolaOi5WJInocYkUVpWzjXWDzll/wkWRIXg2FaMfua5qBH1H/LycDh1EtQioiqOUpJ0DvvSs0g79AUVBz8DWyimXgMwJZyGZch41Ih473YlFQ62fXuYM0+NbvSvh6IyOzHhNoKsZu6dFkvlB/+F0BjUyMQWvc9CiM7RLZN9TEQQ5VWuWiMzNuREXjlZ+eWcyC1HAQYkRgDu2v+QflH8fOxkv/wcRxDBIbVvmMZHhzD9nAEsW/0je48WMmxADOqQCViGTMCwl6PlpKMXHEcrOIFekYetKp+s0NMYMnoMa786SGVFBUlqPqP3fIJz90aUkCiuqnAQQiVFAy8ge98ukveupXzvWkwJgzH1G44a2RtTwmCMkCi+3JnJ2cN643BpnGU7gK6oVAyexuu7DtD3F1dzqn4EPe8wWt5hHNvfw7HtPUwJp2HqOxQjKJxNW49QXFSGbcRgTh86GCUiDiU4st5fAsVlds4P30/522+hF2WghMYQfOEdUq8Xoovotske3AkqItTKt3uyOWd4b8w+5jhd+Xk62/fnERNhI7FXKME1BvBKPiWarftyySuqpFdUMKUVTp/llNTBvQixmfnqh0xURcHp0hgxqBeKLRS17xl8U9CLImsKWw7lcryojD9OOYOgIXH8auh5HM0u5aFXthAyvh+pQcexZ+xj/09Z6P3HMGD4RJ7f2ps7L05iiP4zrv3f4tj6rve4umrmTE0nb+8pmGL7Mc52gPLYYZjDogCoNEdgPW0y3+/Lpc+wEBJsdpw/f4nr8DYc21YDMBEgBDiwnYoD7v0aqgUttBe26AT3XxNhvZjKDkYW7oWE07CmXoZl6BTUkEi//LyEEO2vWyb7/tWt84++P06VQ+OLnRlYTCpnD+9daz29Rs+YghI7E86IqbXc081w1aZDJMWFkZlfzsA+9ROcxWxibEo8X+zM5NsfswEYmxLP1Rck8+m247z7pXuwsPjoYO6+aiTDTj15nKT4MEKDzPyY5WLcxZP5qmow/y37mYVjxnjHhC8ywrCNnI5t5HRcjkp+2vUTg8xZbNt5gPyKCkaYcgmu+I5iw0rCmOkU1OhnfzCjhKXv/oDZpDLzvEH8YswMbKNmkF9QyuKXv+TMIfGMTzuVl5Z/waxxUfQJquC77/YQ6yhlqJqLmrEXXHbOC4K8XqMZcOmt7TJPgBCifXXLZD/klGimjkriwy3HvK/tTM9j3LAEvtmdRZVD45SEMGwWE+VVLqaf05/PtmcwfGDtZN+nVygJ0cF8vTvL+9op8b7HnTgvtS+b9+Zw3sg+2Kwm1nx1mL1HCimrdDJuaAI3XpJC74QI8vLKam2nespFR4sA+Gp3FklxoZySEEaVw92bpmZf+0935vHGx4WMOb0f32cFYbGorC3UiQkPIjYqiHv6JVNW3X/f4dR476tDhAaZGdQ3kjc+3s+gvpEM7BPBui0ZlBrBXDp5OINP7cUbH/XjuW9KCA+JAM5CcxjEVgRx329T+eirvXy1NZ0bpp0niV6ILqpbJnuAX08ZxOHsEqxmE9HhNnbsz+PrH7L497q9gHtS7gvG9ANg0pl9uHziwHoTXquKwqM3jas1eXeQ1fdbdkpCOP+4Y6K3hj3ytF78+/29hIdYuebCZMwmtcH6dnK/KLbvz2P3oXwOZpTw68mnoSgKQVYTJlUhv6SK7/flMnRANOs2H8FqUdnyUw4AV00+jdc27COnqJIp1d04PU/Qbvkph13p+fxy0kDOT0vi3n9+w3tfHeLqC4bw5a4MJo5IJCYiCFVVmDdrJG9/ls5XP2Ry269G4HTqPPfOLua/9B1FpXYmjDidITK5iBBdVrdN9haziQeuHg3A9p/z+Hp3Fv/vo59Jigvlmmmn89h/v+f9b47QKzKIXpHBDe7HpKqYrM1rzdZM5qckhLPw+jHohtFkH3nPDE1/f3sXVovK2cMSvPsLC7bwybYTfLLtBGHBFsoqndx11Zms++YIJpPKhBGJ/O+zdCrsLs48LRZwTxwSbDOxKz2fiBALU0cnEWQ1c+HYfrzz+UEWvboVVVG4+Oz+3hiCbWauuTCZ3/5isDfe+347in+/v5ek+DBm/0K6WArRlXXbZA94W+pDB0RjNilUOTQuG38qp/WNZHRyHN/vy/XW5duDoiiYmtFbpV98GLERQYQFW7jxkhQiqwdzA+gbF4rNYmLq6CTWfXuElP7RDBsQw7ABMWi6gdmkMjo5jqPZZSREux94slpMPHnLeKocGiE2s3d8nCmjkvjo++NEhdq48ZIUnx9yNT+YhvSL4rH/OwvdMHze3BZCdB3dOtl7BNvMnDEwlvySKkYlu/uqXzb+VHbsz+OMgbGdHJ17DOrH/m8cJpNar5R0x8wzUVUFVVGYPKovhnHyLwizyf3/nAuT6w3HHGwz1+pZ5Hntr78/G4u5/nEai01FulcK0dX1iGQP8PvLhmEYJ1v7/eLDeHruBG+Pl85mMfsenbJmi7qh1nVLWt02i39GwRRCdC2Bkek6gNVHkgsLtnRCJEII0fGkECuEED2AJHshhOgB2jXZHzp0iKuuuooLL7yQq666isOHD7fn4YQQQjSgXZP9woULmT17Nhs2bGD27NksWLCgPQ8nhBCiAe2W7PPz89mzZw/Tp08HYPr06ezZs4eCgubPvCSEEMI/2i3ZZ2ZmkpCQgMnk7gVjMpmIj48nMzOzvQ4phBCiAQHd9TI21vegY80RFxfux0j8R+JquUCNTeJqmUCNCwI3Nn/G1W4t+8TERLKzs9E098iNmqaRk5NDYqLMbCSEEB2t3ZJ9bGwsKSkprF27FoC1a9eSkpJCTExME1sKIYTwN8WoO6iKH6Wnp3P//fdTUlJCREQES5YsYeDAge11OCGEEA1o12QvhBAiMMgTtEII0QNIshdCiB5Akr0QQvQAkuyFEKIHkGQvhBA9gCR7IYToAQJ6uISWOnToEPfffz9FRUVERUWxZMkSBgwY0OFxFBYWcu+993L06FGsViv9+/dn0aJFxMTEkJyczJAhQ1CrJ/Z+/PHHSU5O7rDYpkyZgtVqxWZzT2o+b948Jk6cyI4dO1iwYAF2u52+ffvyxBNPEBvbcfPzHj9+nFtvvdX7fWlpKWVlZXz33XcNxtxelixZwoYNGzhx4gRr1qxhyJAhQOPXV0dce77iauxaAzrkemvo/Wrs59YR15uvuBq7zpqK2V8a+5k19r60+T0zupE5c+YYq1atMgzDMFatWmXMmTOnU+IoLCw0vv32W+/3f/3rX40HHnjAMAzDGDJkiFFWVtYpcRmGYUyePNnYt29frdc0TTPOP/98Y8uWLYZhGMbSpUuN+++/vzPC83r00UeNhx9+2DAM3zG3py1bthgZGRn1jtvY9dUR156vuBq71gyjY663ht6vhn5uHXW9NRRXTTWvs8Zi9qeGfmaNvS/+eM+6TRknkIZUjoqKYty4cd7vR44cSUZGRofH0Vy7d+/GZrORlpYGwKxZs/jggw86LR6Hw8GaNWu48sorO+X4aWlp9cZwauz66qhrz1dcgXCt+YqrMR11vTUVV2ddZw39zBp7X/zxnnWbMk5jQyp35ng8uq7zxhtvMGXKFO9rc+bMQdM0Jk2axNy5c7FarR0a07x58zAMg9GjR3PXXXeRmZlJnz59vMtjYmLQdd1bkuhon3zyCQkJCQwbNqzBmCMiIjo0psauL8MwAuLa83WtQedeb75+boFyvfm6zhqKub3U/Jk19r744z3rNi37QPXII48QEhLC1VdfDcBnn33GypUrWb58OQcOHGDp0qUdGs/y5ct57733eOeddzAMg0WLFnXo8ZvjnXfeqdXa6goxB4K61xp07vUW6D+3utcZdHzMvn5m7aXbJPtAHFJ5yZIlHDlyhGeeecZ7g8wTT1hYGDNnzmTbtm0dGpPn+FarldmzZ7Nt2zYSExNr/elfUFCAqqqd0qrPzs5my5YtXHrppd7XfMXc0Rq7vgLh2vN1rXnihs653hr6uQXC9ebrOmss5vZQ92fW2Pvij/es2yT7QBtS+amnnmL37t0sXbrU+2dzcXExVVVVALhcLjZs2EBKSkqHxVRRUUFpaSkAhmGwbt06UlJSGD58OFVVVWzduhWAFStWMG3atA6Lq6Z3332Xc889l+jo6EZj7miNXV+dfe35utagc6+3xn5ugXC91b3OmorZ33z9zBp7X/zxnnWrUS8DZUjl/fv3M336dAYMGEBQUBAASUlJ/O53v2PBggUoioLL5SI1NZU//elPhIaGdkhcx44dY+7cuWiahq7rDBo0iPnz5xMfH8+2bdtYuHBhrW5dvXr16pC4arrwwgt58MEHmTRpUpMxt5dHH32UDz/8kLy8PKKjo4mKiuL9999v9PrqiGvPV1zPPPOMz2tt6dKlbN++vUOuN19xLVu2rNGfW0dcbw39HKH+dQYdd601lB+WLl3a6PvS1vesWyV7IYQQvnWbMo4QQoiGSbIXQogeQJK9EEL0AJLshRCiB5BkL4QQPYAkeyEasWzZMh588MFWbXv//ffz9NNP+zkiIVqn24yNI0R7uPnmmzs7BCH8Qlr2QgjRA0iyF91KdnY2c+fO5ayzzmLKlCm89tprADz33HPcdttt3HHHHaSmpnLFFVfw008/ebd78cUXmThxIqmpqVx44YV888033u3mzZvnXe/jjz/mkksuIS0tjTlz5pCenu5dtmfPHq644gpSU1O54447sNvttWL79NNPmTFjBmlpacyaNatZxxfCb1o3/L4QgUfTNOOKK64wnnvuOcNutxtHjx41pkyZYnzxxRfGs88+awwdOtRYv3694XA4jJdeesmYPHmy4XA4jPT0dGPSpElGVlaWYRiGcezYMePIkSOGYRjGs88+a9x9992GYRjGwYMHjTPPPNPYtGmT4XA4jBdffNE4//zzDbvdbtjtduO8884zXnnlFcPhcBjr1683hg4dajz11FOGYRjGjz/+aJx11lnGjh07DJfLZaxcudKYPHmyYbfbGz2+EP4iLXvRbfzwww8UFBTwxz/+EavVSr9+/fj1r3/NunXrABg2bBjTpk3DYrFw/fXX43A42LlzJyaTCYfDQXp6Ok6nk6SkJE455ZR6+1+3bh3nnnsu48ePx2KxcOONN1JVVcX27dvZuXMnTqeTa6+9FovFwrRp0zjjjDO827755ptcddVVnHnmmZhMJq644gosFgs7duxo9vGFaAu5QSu6jRMnTpCTk+OdzQfcww2npaXRp08fevfu7X1dVVUSEhK86//pT3/iueee48CBA0yYMIH777+fhISEWvvPycmpNYGEZ1ja7OxsTCYTCQkJKIriXV5z3YyMDFatWsXrr7/ufc3pdJKTk8PYsWObdXwh2kJa9qLbSExMJCkpia1bt3r/bd++nX/9618AZGVledfVdZ3s7GzviIaXXnopb7zxBp9++imKovDkk0/W2398fHytMcUNw/DOYBUXF0d2djZGjXEFa66bmJjIzTffXCu2nTt3eqcybM7xhWgLSfai2xgxYgShoaG8+OKLVFVVoWkaP//8M7t27QLgxx9/5MMPP8TlcvGf//wHq9XKmWeeycGDB/nmm29wOBxYrVZsNlutCUA8LrroIj7//HO++eYbnE4n//73v7FaraSmpjJy5EjMZjOvvfYaTqeTDz/8kB9++MG77cyZM1mxYgU7d+7EMAwqKir47LPPKCsra/bxhWgLKeOIbsNkMrFs2TKWLFnC1KlTcTgcnHrqqdxxxx0ATJ06lXXr1nHffffRv39/nnvuOSwWCw6Hg7/97W+kp6djsVhITU31OR3dwIEDeeKJJ3jkkUfIzs4mJSWFZcuWeSefeO655/jzn//MM888w7nnnssvfvEL77ZnnHEGjzzyCIsWLeLIkSMEBQUxatQo0tLSmn18IdpCxrMXPcJzzz3HkSNHpDwieiz5W1EIIXoASfZCCNEDSBlHCCF6AGnZCyFEDyDJXgghegBJ9kII0QNIshdCiB5Akr0QQvQAkuyFEKIH+P/8aCkb/MwAKAAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def train(cfg, env, agent):\n", - " ''' 训练\n", - " '''\n", - " print('开始训练!')\n", - " print(f'环境:{cfg.env}, 算法:{cfg.algo}, 设备:{cfg.device}')\n", - " rewards = [] # 记录所有回合的奖励\n", - " ma_rewards = [] # 记录所有回合的滑动平均奖励\n", - " for i_ep in range(cfg.train_eps):\n", - " ep_reward = 0 # 记录一回合内的奖励\n", - " state = env.reset() # 重置环境,返回初始状态\n", - " while True:\n", - " action = agent.choose_action(state) # 选择动作\n", - " next_state, reward, done, _ = env.step(action) # 更新环境,返回transition\n", - " agent.memory.push(state, action, reward, next_state, done) # 保存transition\n", - " state = next_state # 更新下一个状态\n", - " agent.update() # 更新智能体\n", - " ep_reward += reward # 累加奖励\n", - " if done:\n", - " break\n", - " if (i_ep+1) % cfg.target_update == 0: # 智能体目标网络更新\n", - " agent.target_net.load_state_dict(agent.policy_net.state_dict())\n", - " if (i_ep+1)%10 == 0: \n", - " print('回合:{}/{}, 奖励:{}'.format(i_ep+1, cfg.train_eps, ep_reward))\n", - " rewards.append(ep_reward)\n", - " if ma_rewards:\n", - " ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward)\n", - " else:\n", - " ma_rewards.append(ep_reward)\n", - " print('完成训练!')\n", - " return rewards, ma_rewards\n", - "\n", - "def plot_rewards(rewards,ma_rewards,plot_cfg):\n", - " # clear_output(True) # 清空单元格输出区域,因为多次打印,每次需要清楚前面打印的图片\n", - " sns.set() \n", - " plt.figure() # 创建一个图形实例,方便同时多画几个图\n", - " plt.title(\"learning curve on {} of {} for {}\".format(plot_cfg.device, plot_cfg.algo, plot_cfg.env))\n", - " plt.xlabel('epsiodes')\n", - " plt.plot(rewards,label='rewards')\n", - " plt.plot(ma_rewards,label='ma rewards')\n", - " plt.legend()\n", - " plt.show()\n", - "\n", - "class PlotConfig:\n", - " def __init__(self) -> None:\n", - " self.algo = \"DQN\" # 算法名称\n", - " self.env = 'CartPole-v0' # 环境名称\n", - " self.device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # 检测GPU\n", - "\n", - "cfg = DQNConfig()\n", - "plot_cfg = PlotConfig()\n", - "env,agent = env_agent_config(cfg,seed=1)\n", - "rewards, ma_rewards = train(cfg, env, agent)\n", - "plot_rewards(rewards, ma_rewards, plot_cfg) # 画出结果" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "开始测试!\n", - "环境:CartPole-v0, 算法:DQN, 设备:cuda\n", - "回合:3/20, 奖励:200.0\n", - "回合:6/20, 奖励:200.0\n", - "回合:9/20, 奖励:200.0\n", - "回合:12/20, 奖励:200.0\n", - "回合:15/20, 奖励:200.0\n", - "回合:18/20, 奖励:200.0\n", - "完成测试!\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAEcCAYAAADDfRPAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA6xElEQVR4nO3deVxU9f748dcMMiqhIoY4KIktmqYGOkmJS+KGKaF1vZK55HpNc0sUvJkoaoYZmkYhV65tXjWvC+GGaK5Zpqm5Vl5LNPZNXGGAOb8/+DlfT2wDg0D2fj4ePh7OOZ/zOe/PmQ+8z/mcw/loFEVREEIIIf4/bXUHIIQQomaRxCCEEEJFEoMQQggVSQxCCCFUJDEIIYRQkcQghBBCRRJDBXh7e3PkyJEq3+/x48fp27dvle9X/J/ff/+dVq1akZ+fX+l1x8XF0b17dzw8PDh//nyl12+NnJwcJkyYQMeOHZkyZUp1h3PfBQUFsWzZsuoOo9pIYvgTMRgMxMbGVncY4j4JDQ3l7bff5uTJk7Rp06bI+latWuHu7o6Hhweenp6MHDmSHTt2FCm3b98+/va3v+Hu7o6npycBAQGkpKSY12/evJlWrVrxr3/9S7Vdt27dOHr0aLGx7dq1i/T0dI4ePcqKFSusbGmhmzdvsmjRIp5//nk8PDzo1asXixYtIjMzs0L1bd68mVdeeUW1LCgoiLZt2+Lh4UGnTp0YNWoUly5dqozwK8RoNDJ79mw6dOiAl5cXa9asqbZYSiOJoQYpKCio7hCs9iC0obokJibyxBNPlFomOjqakydPsnPnTgYNGkRISAgffvihef2uXbuYMWMGI0eO5LvvvmPbtm3Y2toydOhQrl+/bi7n4ODA6tWruXnzpsWxubm5UatWrXK3q7irK6PRyMiRI/nf//7H6tWr+eGHH9iwYQMODg6cOXOmUvZx15gxYzh58iQHDhzA0dGR2bNnl7v+yrJy5Uri4+PZt28fn332GatXr+bgwYPVFk9JJDFYyWQyERkZSa9evfD09GTq1Klcu3bNvH7KlCl4eXnRsWNHXn31VS5evGheFxQURHBwMOPGjcPd3Z2jR4/i7e1NVFQUvr6+dOzYkWnTppGbmwvA0aNH6datm3n70soC/Otf/6JLly506dKFjRs30qpVK+Lj44ttx7Vr15g9ezZdunThmWeeYeLEiUDxZ2H31vPHNkRFReHl5aVKEHFxcfj6+lp0vP7oyy+/pHfv3nTq1IkJEyaoznxbtWrFunXr6NOnDwaDgfnz51PSH/IXFBQQERFBr1698PDw4KWXXiIpKanYoaHhw4ezceNG83ahoaF4enrSs2dPDhw4oKp306ZN9OvXDw8PD3r27Mn69etLbIvJZOKjjz6iR48ePPfcc8yaNYsbN25gNBrx8PCgoKAAPz8/evXqVWIddzk6OjJw4EDmzZvHqlWryMrKQlEUQkNDef311/H19aVOnTo4OTmxaNEi6taty2effWbe/tFHH8XDw4NPPvmkzH2tWLGCjz76iJ07d+Lh4cHGjRtLbAv833Dbxo0bef755xk5cmSROqOjo0lKSuLDDz/k8ccfR6vV0qhRIyZNmkT37t0BzP3Ew8ODF154gbi4OPP2mzdvxt/fn3feeQdPT0+mT59OcHAwp06dwsPDA4PBUGSfdevWxdfX1/wzeOnSJYYPH47BYKB///7s3bu3xGOwb98+/Pz8MBgM+Pv789NPPxVbLjIysshQ28KFC1m4cCEAW7ZsYeLEiTRo0IDHHnuMwYMHs2XLltIOf7WQxGClzz//nD179vDFF19w6NAhGjRoQEhIiHl9t27diI2N5dtvv6VNmzYEBASott+2bRsTJkzgxIkTdOzYEYCdO3eyevVq9u7dy88//8zmzZtL3H9JZQ8ePMgnn3zCmjVriIuLK3GI4K5Zs2Zx584dtm/fzpEjR3jttdcsPgb3tmHkyJHUrVuX7777zrw+JibGnBjKOl73+vbbb3n//fdZvnw5hw8fpmnTprz55puqMvv37+e///0vX331FTt37uTQoUPF1rVmzRq2b99OZGQkJ06c4J133qFOnTpltu3LL79k3759bN26lU2bNrFr1y7V+kaNGrFq1SpOnDjB4sWLWbx4MefOnSu2rs2bN7NlyxY+++wz9uzZw+3btwkJCUGn03Hy5Emg8Bfmnj17yozrrp49e1JQUMDp06f59ddfSUxMxMfHR1VGq9XSp08fDh8+rFo+depUPv3001ITMxSe3PzjH/+gX79+nDx5ksGDB5fYlnsdO3aMHTt2EBUVVaTOI0eO0LVrVx566KES9+vq6sratWv54YcfeOONN5g5cyapqanm9adPn8bV1ZVvvvmG9957j/nz5+Pu7s7Jkyc5fvx4kfpu3bpFTEwMrVu3Ji8vjwkTJuDl5cWRI0eYM2cOAQEB/Prrr0W2O3/+PP/85z8JCQnh6NGjDBkyhIkTJ2I0GouU7d+/PwcOHDBfiRUUFLBr1y4GDBhAdnY2aWlpPPnkk+byTz75JP/73/9KPAbVRRKDldavX8/06dNp0qQJOp2ON954g9jYWPMZ6N/+9jfs7e3R6XRMnjyZn376yXxmBYU/2B07dkSr1VK7dm2g8IzV2dkZBwcHevTowYULF0rcf0lld+7cyUsvvcQTTzxB3bp1mTx5col1pKamcvDgQebPn0+DBg2wtbWlU6dOFh+DP7ahf//+bNu2DSgcRz548CD9+/e36HjdKyYmhpdffpmnnnoKnU7Hm2++yalTp/j999/NZcaNG0f9+vVxcXHB09OzxDO5jRs3MnXqVB599FE0Gg1PPvkkDRs2LLNtO3fuZOTIkej1ehwcHPjHP/6hWv/888/zyCOPoNFo6NSpE15eXsX+Urrbntdeew1XV1ceeugh3nzzTXbs2GHVjWxbW1saNmxIdnY2WVlZADRu3LhIOScnJ/P6u1q3bk3nzp2L3GuwhCVtmTx5MnZ2dsUm4GvXruHk5FTqPvr164ezszNarZYXXniB5s2bc/r0afP6xo0bM3z4cGrVqlVqkv/3v/+NwWCgT58+3Lp1i3fffZcff/yR27dvM378eHQ6Hc899xw9evRg+/btRbbfsGEDQ4YM4emnn8bGxoZBgwZha2vLqVOnipRt2rQpbdq0MSf37777jjp16uDu7s7t27cBqFevnrl8vXr1uHXrVqnHoTqUf8BQqCQmJjJp0iS02v/LsVqtloyMDB5++GGWLVvGrl27yMzMNJfJysoydw69Xl+kznt/YOrWras6S7K0bGpqKm3btjWvK24/dyUnJ9OgQQMaNGhQVnOL9ce6fX198ff3Z/78+cTFxdGmTRuaNm0KlH68nJ2dVfWkpqby1FNPmT8/9NBDODg4kJKSQrNmzYCi7S/phyw5OZlHHnmk3G1LTU1Vtc/FxUW1/sCBA4SHh3P58mVMJhM5OTm0bNmyxLruHgco/CWSn59fbNstlZeXR2ZmJg0aNDAnutTUVFxdXVXl0tLSik2EU6ZMYfDgwYwaNapc+y2tLXc1adKkxO0dHBxIS0srdR9bt25lzZo1JCQkAHD79m1Vciut/nuNHj2a6dOnq5adPXuWJk2aqPqhi4uLaqjyrsTERLZu3coXX3xhXpaXl0dqaipfffUVwcHBAHTs2JHVq1czYMAAtm3bxsCBA9m2bRsDBgwAwM7ODig8Wbp7Enjz5s1Sr5qqiyQGKzVp0oR33nnHPAx0r61bt7J3717WrFlDs2bNuHHjBs8880yJ4+CVqXHjxqpOnpSUVGLZJk2akJ2dzfXr16lfv75qXd26dcnJyTF/LuuHGeDxxx/HxcWFgwcPqn4w7u6rpONVXBvu/lKAwl8M165dq9Av0SZNmnDlypUiv7Tv/rDm5ORgb28PqNvo5OSkOnb3/t9oNDJlyhRCQ0Pp2bMntra2TJw4scTv94/tSUxMpFatWjRq1Kjc7blr79692NjY0L59exwcHGjSpAm7du1i3Lhx5jImk4ndu3fj7e1dZPvHHnuMPn36EBERUa79ltaW5ORkADQaTYnbd+7cmeXLl3P79m3zd3CvhIQE5syZwyeffIKHhwc2Njb4+fmpyvyx/tL2V1z8ycnJmEwmc3JISkrCzc2tSFm9Xs+ECRN4/fXXi63rxRdfVH3u168foaGhJCcnExcXx4YNGwBo0KABTk5O/PTTT3h5eQHw008/8fjjj1scd1WRoSQrvfLKKyxfvtz8Q5KZmWm+jLx16xY6nY6GDRty584dwsLCqiwuHx8fNm/ezKVLl7hz5w4fffRRiWUbN25Mt27dmD9/PtnZ2eTl5XHs2DGgcAz04sWLXLhwgdzcXFauXGnR/gcMGMCnn37KsWPHVGPepR2v4urYvHkzFy5cwGg0EhYWRvv27c1XC+UxePBgPvjgAy5fvoyiKPz0009kZWXh6OiIs7Mz0dHRFBQU8N///perV6+at+vXrx+ff/45ycnJZGdnExkZaV5nNBoxGo04OjpSq1YtDhw4wDfffFPmMbl69Sq3bt1i2bJl9OvXr0JP+ly7do2vvvqKkJAQxo0bR8OGDdFoNAQGBvLxxx8TExNDbm4uaWlpvPXWW2RlZTFs2LBi65o0aRKbNm1SDXGWxdq2+Pn50aRJEyZPnsylS5cwmUxkZWURERHBgQMHuHPnDhqNBkdHR6DwJv+9D24Up1GjRqSkpBQ79v9H7du3p06dOqxevZq8vDyOHj3K119/zQsvvFCk7ODBg1m/fj0//vgjiqJw+/Zt9u/fX+ITXY6OjnTq1InZs2fTrFkzHnvsMfO6gQMH8vHHH5Odnc2lS5fYuHEjgwYNKjPeqiZXDFYaMWIEiqIwevRoUlNTadSoES+88AK9evVi4MCBHD58mK5du+Lg4MDUqVNZt25dlcTVvXt3hg8fzogRI9BoNEycOJGtW7ei0+mKLb9kyRIWL15Mv379yMvLw9PTk2eeeYYWLVowadIkXnvtNerUqcObb75pPgMqzYABAwgLC6Nbt27mH24o/Xj9UefOnZk6dSqTJ0/m+vXreHh4VPiPjkaNGoXRaGT06NFkZWXx6KOPEh4eDsCCBQuYP38+y5Yt429/+xseHh7m7f7+979z+fJl/Pz8eOihhxgzZoz5xrq9vT1z5sxh2rRpGI1GevToUexZ+V0vv/wyKSkpDBs2jNzcXLp06cLbb79drnb4+fmh0WiwtbWlVatWzJ4923xjH+CFF15Ap9Px8ccfM2fOHPPQ1ueff17svQcovMnr5+dXrr5pbVt0Oh2ffPIJK1asYPTo0Vy/fp1GjRrRs2dP2rdvT8OGDRk9ejT+/v5oNBoGDhxIhw4dSq3z2Wef5fHHH6dLly5oNJpSH7jQ6XREREQwf/58Vq1ahbOzM0uWLFH9Er+rXbt2LFiwgJCQEOLj46lTpw4dOnQo9smnuwYMGEBgYCAzZ85ULZ8yZQrBwcH06NGDOnXqMG7cONWThjWFRibq+Wu4dOkSAwYM4MyZMxU6QxV/TocPH2bGjBl88skntG7durrDEX8SMpT0AIuLi8NoNJKdnc17771Hjx49JCn8xXTp0oXFixcX+wSNECWRK4YH2JgxYzh16hQ2NjY888wzBAcHlzicIIQQd0liEEIIoSJDSUIIIVQkMQghhFCRxCCEEELlgXhEJSvrFiZT+W+VNGpkT0aGZa8drg4Sn3UkPuvV9BglvorRajU0bFjyqzgeiMRgMikVSgx3t63JJD7rSHzWq+kxSnyVT4aShBBCqEhiEEIIofJADCUJIaqPoihkZaVhNOYAVTtskpqqxWQyVek+y6N649Og09WhYUOncr15FixIDFlZWcyaNYsrV66g0+lo3rw5ISEhODo6MmPGDI4ePUpaWhonTpxQvVf81KlTzJ07l9zcXJo2bcp7771X7OuF79y5w+zZszl37hw2NjYEBgbSo0ePcjVCCFF9bt7MRqPR4OzcDI2magchatXSkp9fcxNDdcanKCauXUvn5s1s6tVzKNe2ZX6LGo2GsWPHEhsbS0xMDK6urixduhQonJ0sOjq6yDYmk4mZM2cyd+5cYmNjMRgM5m3+KCoqCnt7e+Li4oiIiGDOnDk1ckYjIUTx7ty5Sb16DlWeFETpNBot9eo15M6d8j8VVeY36eDggKenp/mzu7s7iYmJADz33HPFXgWcPXuW2rVrm19L6+/vX2Su3Lt27tzJkCFDAHBzc6Nt27YcPHiw3A0RQlQPk6kAGxsZla6JbGxqYTIVlHu7cqV4k8nEunXrSn3nPBTOhHTvFIiOjo6YTKZiJx1PTExUTRGo1+vNM0AJIf4cyjuGLapGRb+XcqX5BQsWYGdnV+JMUNWlUSP7Cm/r5FSv7ELVSOKzjsRnvbJiTE3VUqtW9Q0jVee+yxISEkzr1q0ZPNi/2mLQarXl7mcWJ4bQ0FDi4+OJiIhQTaBdHL1ebx5ugsLpG7VaLQ4ODkXKuri4kJCQYJ7lKykpSTV0ZYmMjJsV+iMSJ6d6pKVZPp1hVZP4rCPxWc+SGE0mU7XdYC3r5m5+fn6VzUFS0r5MJqVab5CbTKYi36FWqyn1hNqiIxYWFsbZs2eJjIwscWrIe7Vt25acnByOHz+OwWBg/fr1qnl/7+Xj48OGDRto164dly9f5syZM7z//vuWhCWEEEV06WJg1KhxfPvtN3h6PsfQocNZuXIZly5dxGg04uFhYPLk6SQkXOWf/5zFF198SX5+Pv3792TkyDEMHTqCvXvjOHRoP/PmLWLdui/Yu3c3BQX56HS1CQgI4oknWhW7r4EDX2bhwmAyMtJp0kSPjc3/nURHR2/myy//g62tDkUxERLyLs2bu1XLMSpLmYnh4sWLrFq1Cjc3N/z9Cy+HmjVrRnh4OG+88QanT58GCn/Bt2zZkqioKLRaLUuWLCE4OFj1uOpdfn5+REZG4uzszJgxYwgKCqJ3795otVpCQkKwt6/40JAQovp8cyaJw6eT7kvdXdrr8Wqnt6hs7dq1Wb36MwDefXcB7u4dCAp6G5PJxPz5c9i+/StefHEQt2/fIj09neTkRFq0eIzjx48xdOgIfvjhewyGZwDw8enPK68UDp8fO3aU995bTGTkJ8Xu6623ZvL00x6MHj2ehITfGTVqKJ06PQfARx99wNq1m3j44YcxGo01+u8vykwMTzzxBD///HOx6z788MMSt+vQoQMxMTHFrrv3EVc7OztWrFhRVhhCCGGxfv0GmP9/+PBBLlw4x/r1awHIycmhcWNnADp0MPDDD9+TlJSIn99LrF37GXl5eRw//j3Dhr0GwM8/X+Dzz9dw/Xo2Wq2Wq1evlLivEyd+YNq0mQA0bdoMg6GTeV2HDs+waFEwXl5dee65LjRt2uy+tL0yyDNmQohK49XO8rP6+6luXbt7Pim8887SYn8Rd+z4DD/8cIzExATmzl3AqVMn2LMnFkUBF5em5OXl8fbbgXz44b9o1epJ0tPTGDiwXyn7Ktk777zHhQvn+OGH40yZMoGAgNk895yXNc28b2ru7XwhhKgEXl7d+OKLTykoKHye/9q1ayQmJgCFieHo0W+5ceMGjRs7YzB0IipqlXkYyWjMpaCgwHyFsXnzxlL31bGjge3bvwIgMTGB48e/BwpvTCcmJtCmTVuGD3+NTp2e5eLF4kdiagK5YhBCPNCmTp3BRx+t4LXXXkGj0WBrq2PKlBm4uDSlcWNn7OzsaN/eHShMFCkpyXToUPjHuQ89ZM+YMf9g3LgR1K/fgB49epaxrwAWLgxmz55Y9HoXPDw6AoVPBi1aNI+bN2+g0WhxdnZmwoQ37mu7raFRFOXP97LwP5DHVauHxGedmh4fWBZjcnI8TZo0r6KI1ORdSWUr7vsp63FVGUoSQgihIolBCCGEiiQGIYQQKpIYhBBCqEhiEEIIoSKJQQghhIokBiGEECqSGIQQ4k9k0aJ5bNq04b7uQxKDEEKUQ35+/gO5r3vJKzGEEJUm75dvyPv5/szZbtuqG7Yty37pXJcuBsaNe51Dhw6QnZ1NYOBbHD/+PUePHiE/P58FC0Jxc2tBRkY68+a9xa1btzAajXTu7MXEiVNLrLMiczz07duLkSNHV+ocD/dOlHa/5niQxCCEeODY29dj9erP+PrrPcyePYN5895hwoQ3WLv2Uz777N/MnbsAe/t6hIYuw87Ojvz8fN588w2+++4Izz7budg6KzLHw6OPPlrpczy89tpQPD3v7xwPkhiEEJXGtqWXRWf191vPnn0AaNXqSUCDl1fX//+5NQcO7AMKX2z30UcfcObMaUAhIyODixd/KTExVGSOh4EDX+bzzz+t5DkenjGvu19zPJSZGLKyspg1axZXrlxBp9PRvHlzQkJCcHR05NSpU8ydO1c1S1ujRo04ceIE8+fPN9eRkZGBk5MTW7ZsKVJ/UFAQR44coWHDhkDhTHCvv/56pTROCPHXdHcKYq1Wi05na16u1WrNr9/esGEtN25cJzLyE2rXrk1o6CKMxtwS66zIHA8hIYs4ceKHP90cD2XefNZoNIwdO5bY2FhiYmJwdXVl6dKlmEwmZs6cydy5c4mNjcVgMLB06VKgcPa26Oho87/27dszYMCAEvcxfvx4c1lJCkKIqnDjxg0aNXqY2rVrk5aWyuHDByzetnrneDgG3N85HspMDA4ODnh6epo/u7u7k5iYyNmzZ6lduzYGQ+F7y/39/dm1a1eR7TMyMvjmm2/w8/OrlICFEKIyDB7sz5kzPzJ8+N9ZvHgBHTs+U/ZG/9/UqTOwsdHy2muvMGLEEGbMmExaWhpAueZ4GD16GHXr1i1jXwGcPPkDw4YNZtmyJUXmeBgxYggjR75CRkY6fn4vVeBIFFWu+RhMJhOjR4/G29sbZ2dnNm3aRGRkpHn9008/zYEDB3BwcDAvi4qK4ocffuCjjz4qts6goCCOHTuGnZ0drq6uzJgxg8cee6ziLRJCVKlz587j4lI98zGIsiUmxvPUU23KtU25bj4vWLAAOzs7hg0bRlxcnEXbbN68mTfffLPE9dOnT8fJyQmtVsvWrVsZO3Yse/bswcbGxuK4ZKKe6iHxWaemxweWxWgymaptMpqaMBFOaWpCfCaTqch3WGkT9YSGhhIfH8/y5cvRarXo9XoSExPN6zMzM9FqtaqrhVOnTpGdnU337t1LrNfZ2dn8XO7AgQO5ffs2ycnJloYlhBCiklmUGMLCwjh79izh4eHmu/1t27YlJyeH48ePA7B+/Xp8fHxU223atIkXX3yRWrVKvjBJSUkx///QoUNotYXzoQoh/jwegBmCH0gV/V7KHEq6ePEiq1atws3NDX9/fwCaNWtGeHg4S5YsITg4WPW46l05OTns2LGDL7/8skidfn5+REZG4uzsTGBgIBkZGWg0Guzt7fn4449LTSRCiJpFq7WhoCCfWrVsyy4sqlRBQT5areXD8neV6+ZzTSX3GKqHxGedmh4fWBbjjRvXyM/Pw8GhERpN1b5+rSaM4ZemOuNTFBPXrqVTq5aOevUcVOvKuscgp+ZCCKvY2zcgKyuNlJTfgao9z9RqtZX2Goj7oXrj06DT1cHevkG5t5TEIISwikajwdGxcbXsu6ZfddX0+Eoir90WQgihIolBCCGEiiQGIYQQKpIYhBBCqEhiEEIIoSKJQQghhIokBiGEECqSGIQQQqhIYhBCCKEiiUEIIYSKJAYhhBAqkhiEEEKoSGIQQgihIolBCCGESpmv3c7KymLWrFlcuXIFnU5H8+bNCQkJwdHRkVOnTjF37lzVDG6NGjUCoFWrVrRs2dI8n/OSJUto1apVkfrT09OZNWsWCQkJ1K5dmwULFvD0009XcjOFEEJYqswrBo1Gw9ixY4mNjSUmJgZXV1eWLl2KyWRi5syZzJ07l9jYWAwGA0uXLlVtu379eqKjo4mOji42KQC8//77GAwGYmNjmTt3LjNnzpT5Y4UQohqVmRgcHBzw9PQ0f3Z3dycxMZGzZ89Su3ZtDAYDAP7+/uzatavcAezatcs8l7TBYECn03HmzJly1yOEEKJylGsGN5PJxLp16/D29iYpKQkXFxfzOkdHR0wmE9euXcPBwQGA4cOHU1BQQLdu3Zg8eTI6nU5VX1ZWFoqi4OjoaF6m1+tJTk6mffv2FsdV2tylZXFyqlfhbauCxGcdic96NT1Gia/ylSsxLFiwADs7O4YNG0ZcXFypZffv349er+fmzZvMnDmT8PBwpk+fblWwJcnIuInJVP7hp5o+7Z7EZx2Jz3o1PUaJr2K0Wk2pJ9QWP5UUGhpKfHw8y5cvR6vVotfrSUxMNK/PzMxEq9Warxb0ej0A9vb2DB48mBMnThSps2HDhuZt70pKSqJJkyaWhiWEEKKSWZQYwsLCOHv2LOHh4ebhoLZt25KTk8Px48eBwhvNPj4+AGRnZ5OTkwNAfn4+sbGxtG7duti6fXx8WL9+PQDHjx8nJyeHtm3bWtcqIYQQFVbmUNLFixdZtWoVbm5u5pvEzZo1Izw8nCVLlhAcHKx6XBXg119/Ze7cuWg0GvLz8/Hw8GDq1KkApKSkMH78eKKjowGYMWMGM2fOZOvWrdSuXZslS5aYH3EVQghR9TTKA/BsqNxjqB4Sn3VqenxQ82OU+Cqm0u4xCCGE+GuQxCCEEEJFEoMQQggVSQxCCCFUJDEIIYRQkcQghBBCRRKDEEIIFUkMQgghVCQxCCGEUJHEIIQQQkUSgxBCCBVJDEIIIVQkMQghhFCRxCCEEEJFEoMQQgiVMifqycrKYtasWVy5cgWdTkfz5s0JCQnB0dGRU6dOMXfuXNVEPY0aNeK3335j7ty5pKWlUatWLdq1a0dwcDB16tQpUv/w4cNJTEzE3r7w3eAjRozg5ZdfrvyWCiGEsEiZVwwajYaxY8cSGxtLTEwMrq6uLF26FJPJxMyZM5k7dy6xsbEYDAaWLl0KgK2tLbNnz2bXrl189dVX3Llzh6ioqBL3MWfOHKKjo4mOjpakIIQQ1azMxODg4ICnp6f5s7u7O4mJiZw9e5batWtjMBgA8Pf3Z9euXUDh1J9t2rQp3IFWS/v27UlMTLwf8QshhKhk5brHYDKZWLduHd7e3iQlJeHi4mJe5+joiMlk4tq1a6ptcnJy2LRpE97e3iXWu2TJEnx9fQkICCAlJaV8LRBCCFGpyrzHcK8FCxZgZ2fHsGHDiIuLK7N8fn4+06dP59lnn6Vnz57FllmyZAl6vZ6CggJWrVrFtGnTWLduXXnCKnXu0rI4OdWr8LZVQeKzjsRnvZoeo8RX+SxODKGhocTHxxMREYFWq0Wv16uGhzIzM9FqtTg4OABQUFBAQEAADRo0YM6cOSXWq9frAbCxsWHEiBF8+OGHmEwmtFrLL2YyMm5iMikWl7+rpk7UfZfEZx2Jz3o1PUaJr2K0Wk2pJ9QW/fYNCwvj7NmzhIeHo9PpAGjbti05OTkcP34cgPXr1+Pj4wMUDjkFBQVhY2PDokWL0Gg0xdabn59Penq6+fP27dtp2bJluZKCEEKIylXmFcPFixdZtWoVbm5u+Pv7A4U3l8PDw1myZAnBwcGqx1UBDh48yFdffUXLli156aWXAOjQoQPBwcGkpKQwfvx4oqOjMRqNjB8/nry8PAAaN25MWFjY/WqrEEIIC2gURSn/GEwNI0NJ1UPis05Njw9qfowSX8VUylCSEEKIvw5JDEIIIVQkMQghhFCRxCCEEEJFEoMQQggVSQxCCCFUJDEIIYRQkcQghBBCRRKDEEIIFUkMQgghVCQxCCGEUJHEIIQQQkUSgxBCCBVJDEIIIVQkMQghhFCRxCCEEEKlzMSQlZXFuHHj6Nu3L76+vrzxxhtkZmYCcOrUKV588UX69u3L6NGjycjIMG9X2rp73blzh2nTptG7d298fHzYt29fJTVNCCFERZSZGDQaDWPHjiU2NpaYmBhcXV1ZunQpJpOJmTNnMnfuXGJjYzEYDCxduhSg1HV/FBUVhb29PXFxcURERDBnzhxu3bpVua0UQghhsTITg4ODA56enubP7u7uJCYmcvbsWWrXro3BYADA39+fXbt2AZS67o927tzJkCFDAHBzc6Nt27YcPHjQulYJIYSosFrlKWwymVi3bh3e3t4kJSXh4uJiXufo6IjJZOLatWulrnNwcFDVmZiYSNOmTc2f9Xo9ycnJFWyO5c59vZ1al79FqcBc0VXlZ61G4rOCxGe9mh7jXz2+gkc785R3/0qvt1yJYcGCBdjZ2TFs2DDi4uIqPZiKKm1S65LUrWNLHqDRaio/oEok8VlH4rNeTY/xrxxf3Tq2ODnVq/R6LU4MoaGhxMfHExERgVarRa/Xk5iYaF6fmZmJVqvFwcGh1HV/5OLiQkJCAo6OjgAkJSWphq4skZFxE1M5s/Kjnfvg5PcyaWk3yrVdVXJyqifxWUHis15Nj1Hio0L1a7WaUk+oLXpcNSwsjLNnzxIeHo5OpwOgbdu25OTkcPz4cQDWr1+Pj49Pmev+yMfHhw0bNgBw+fJlzpw5Q9euXS1snhBCiMpW5hXDxYsXWbVqFW5ubvj7+wPQrFkzwsPDWbJkCcHBweTm5tK0aVPee+89ALRabYnrAPz8/IiMjMTZ2ZkxY8YQFBRE79690Wq1hISEYG9f/qEhIYQQlUOjKErNvXNjoYoMJYFchlpL4rNOTY8Pan6MEl/FVMpQkhBCiL8OSQxCCCFUJDEIIYRQkcQghBBCRRKDEEIIFUkMQgghVCQxCCGEUJHEIIQQQkUSgxBCCBVJDEIIIVQkMQghhFCRxCCEEEJFEoMQQggVSQxCCCFUJDEIIYRQkcQghBBCxaI5n0NDQ4mNjSUhIYGYmBhatmwJwP79+/nggw/Iz8+nQYMGLF68GFdXV37//XcmTZpk3v7GjRvcvHmT77//vkjdK1eu5D//+Q+NGzcGoEOHDgQHB1dG24QQQlSARYmhZ8+ejBgxgldffdW8LDs7m8DAQNavX0+LFi2Ijo5m3rx5REVF0axZM6Kjo81lFy1aREFBQYn1Dxw4kMDAQCuaIYQQorJYNJRkMBjQ6/WqZfHx8Tz88MO0aNECgO7du3P48GEyMzNV5YxGIzExMbz88suVFLIQQoj7yaIrhuK0aNGC9PR0Tp8+Tfv27YmJiQEgKSkJR0dHc7mvv/4aZ2dnnnrqqRLr2r59O4cPH8bJyYnJkyfj4eFRrlhKm7u0LE5O9Sq8bVWQ+Kwj8Vmvpsco8VW+CieGevXqsWzZMhYvXkxubi7dunWjfv362NjYqMpt2rSp1KsFf39/JkyYgK2tLd988w0TJ05kx44dNGzY0OJYMjJuYjIp5W5DTZ2o+y6JzzoSn/VqeowSX8VotZpST6grnBgAOnfuTOfOnQFIT08nKiqKRx55xLw+JSWFY8eOsWTJkhLrcHJyMv/fy8sLvV7PxYsX6dSpkzWhCSGEqCCrHldNS0sDwGQyERYWhr+/P3Z2dub1W7ZsoXv37qWe/aekpJj/f+HCBRISEsz3LYQQQlQ9i64YFi5cyO7du0lPT2fUqFE4ODiwfft2li9fzokTJ8jLy8PLy4uAgADVdlu2bOGtt94qUt+4ceOYMmUK7dq1IywsjHPnzqHVarG1tWXJkiWqqwghhBBVS6MoSvkH52sYucdQPSQ+69T0+KDmxyjxVUxZ9xjkL5+FEEKoSGIQQgihIolBCCGEiiQGIYQQKpIYhBBCqEhiEEIIoSKJQQghhIokBiGEECqSGIQQQqhIYhBCCKEiiUEIIYSKJAYhhBAqkhiEEEKoSGIQQgihIolBCCGEikWJITQ0FG9vb1q1asUvv/xiXr5//34GDRqEr68vw4YN4+rVq+Z13t7e+Pj44Ofnh5+fH4cOHSq27jt37jBt2jR69+6Nj48P+/bts7JJQgghrGHRDG49e/ZkxIgRvPrqq+Zl2dnZBAYGsn79elq0aEF0dDTz5s0jKirKXGbFihW0bNmy1LqjoqKwt7cnLi6Oy5cv8+qrr7J7924eeuihCjZJCCGENSy6YjAYDOj1etWy+Ph4Hn74YfP8zN27d+fw4cNkZmaWK4CdO3cyZMgQANzc3Gjbti0HDx4sVx1CCCEqT4XvMbRo0YL09HROnz4NQExMDABJSUnmMgEBAfj6+jJv3jyuX79ebD2JiYk0bdrU/Fmv15OcnFzRsIQQQljJoqGk4tSrV49ly5axePFicnNz6datG/Xr18fGxgaAtWvXotfrMRqNLFq0iJCQEJYuXVppgd+rtLlLy+LkVK8SI6l8Ep91JD7r1fQYJb7KV+HEANC5c2c6d+4MQHp6OlFRUTzyyCMA5qEnnU7H0KFDef3114utw8XFhYSEBBwdHYHCKw5PT89yxZGRcROTSSl3/DV1ou67JD7rSHzWq+kxSnwVo9VqSj2htupx1bS0NABMJhNhYWH4+/tjZ2fH7du3uXGj8GAoisKOHTto3bp1sXX4+PiwYcMGAC5fvsyZM2fo2rWrNWEJIYSwgkVXDAsXLmT37t2kp6czatQoHBwc2L59O8uXL+fEiRPk5eXh5eVFQEAAABkZGUyePJmCggJMJhOPPfYYwcHB5vr8/PyIjIzE2dmZMWPGEBQURO/evdFqtYSEhGBvX/GhISGEENbRKIpS/jGYGkaGkqqHxGedmh4f1PwYJb6Kua9DSUIIIR48khiEEEKoSGIQQgihIolBCCGEiiQGIYQQKpIYhBBCqEhiEEIIoSKJQQghhIokBiGEECqSGIQQQqhIYhBCCKEiiUEIIYSKJAYhhBAqkhiEEEKoSGIQQgihIolBCCGEikUzuIWGhhIbG0tCQgIxMTG0bNkSgP379/PBBx+Qn59PgwYNWLx4Ma6urmRlZTFr1iyuXLmCTqejefPmhISEmOd1vldQUBBHjhyhYcOGQOFUnyXNDy2EEOL+s+iKoWfPnqxdu5amTZual2VnZxMYGEhYWBgxMTEMHjyYefPmAaDRaBg7diyxsbHExMTg6urK0qVLS6x//PjxREdHEx0dLUlBCCGqmUWJwWAwoNfrVcvi4+N5+OGHadGiBQDdu3fn8OHDZGZm4uDggKenp7msu7s7iYmJlRi2EEKI+6XC9xhatGhBeno6p0+fBiAmJgaApKQkVTmTycS6devw9vYusa41a9bg6+vLxIkTuXTpUkVDEkIIUQksusdQnHr16rFs2TIWL15Mbm4u3bp1o379+tjY2KjKLViwADs7O4YNG1ZsPdOnT8fJyQmtVsvWrVsZO3Yse/bsKVJPaUqb1LosTk71KrxtVZD4rCPxWa+mxyjxVT6NoiiKpYW9vb2JiIgw33y+V3p6Oj169ODo0aPY2dkBhTetf/75ZyIiItDpdBbtw9PTk82bN6vuZ5QlI+MmJpPFzTBzcqpHWtqNcm9XVSQ+60h81qvpMUp8FaPVako9obbqcdW0tDSgcLgoLCwMf39/c1IICwvj7NmzhIeHl5oUUlJSzP8/dOgQWq0WZ2dna8ISQghhBYuGkhYuXMju3btJT09n1KhRODg4sH37dpYvX86JEyfIy8vDy8uLgIAAAC5evMiqVatwc3PD398fgGbNmhEeHg6An58fkZGRODs7ExgYSEZGBhqNBnt7ez7++GNq1arwCJcQQggrlWsoqaaSoaTqIfFZp6bHBzU/RomvYu7rUJIQQogHjyQGIYQQKpIYhBBCqEhiEEIIoSKJQQghhIokBiGEECqSGIQQQqhIYhBCCKEiiUEIIYSKJAYhhBAqkhiEEEKoSGIQQgihIolBCCGEiiQGIYQQKpIYhBBCqEhiEEIIoVJmYggNDcXb25tWrVrxyy+/mJfv37+fQYMG4evry7Bhw7h69ap53W+//caQIUPo27cvQ4YM4fLly8XWXVBQwPz58+nVqxe9e/dm48aN1rdICCGEVcpMDD179mTt2rU0bdrUvCw7O5vAwEDCwsKIiYlh8ODBzJs3z7w+ODiYoUOHEhsby9ChQ5k7d26xdcfExHDlyhV2797Nhg0bWLlyJb///rv1rRJCCFFhZSYGg8GAXq9XLYuPj+fhhx+mRYsWAHTv3p3Dhw+TmZlJRkYG58+fZ8CAAQAMGDCA8+fPk5mZWaTuHTt2MHjwYLRaLY6OjvTq1Ytdu3ZVRruEEEJUUK2KbNSiRQvS09M5ffo07du3JyYmBoCkpCQURcHZ2RkbGxsAbGxsaNy4MUlJSTg6OqrqSUpKwsXFxfxZr9eTnJxc7nhKm7u0LE5O9Sq8bVWQ+Kwj8Vmvpsco8VW+CiWGevXqsWzZMhYvXkxubi7dunWjfv362NjYkJ+fX9kxlikj4yYmk1Lu7WrqRN13SXzWkfisV9NjlPgqRqvVlHpCXaHEANC5c2c6d+4MQHp6OlFRUTzyyCPcuXOHlJQUCgoKsLGxoaCggNTU1CLDUVB4hZCYmEj79u2BolcQQgghql6FH1dNS0sDwGQyERYWhr+/P3Z2djRq1IjWrVuzbds2ALZt20br1q2LDCMB+Pj4sHHjRkwmE5mZmezZs4e+fftWNCQhhBCVoMzEsHDhQrp160ZycjKjRo2if//+ACxfvpx+/frRp08fbG1tCQgIMG8zb948vvjiC/r27csXX3zB/PnzzevGjRvHmTNnAPDz86NZs2b06dOHv//970yaNAlXV9fKbqMQQohy0CiKUv7B+RpG7jFUD4nPOjU9Pqj5MUp8FVPWPQb5y2chhBAqkhiEEEKoSGIQQgihUuHHVWsSrVZTLdtWBYnPOhKf9Wp6jBJf+ZUV0wNx81kIIUTlkaEkIYQQKpIYhBBCqEhiEEIIoSKJQQghhIokBiGEECqSGIQQQqhIYhBCCKEiiUEIIYSKJAYhhBAqD8QrMUrz22+/ERQUxLVr13BwcCA0NBQ3NzdVmYKCAhYuXMihQ4fQaDSMHz+ewYMHV0l8WVlZzJo1iytXrqDT6WjevDkhISFFJjYKCgriyJEjNGzYECic5Oj111+vkhi9vb3R6XTUrl0bgICAALp27aoqc+fOHWbPns25c+ewsbEhMDCQHj163PfYfv/9dyZNmmT+fOPGDW7evMn333+vKrdy5Ur+85//0LhxYwA6dOhAcHDwfYkpNDSU2NhYEhISiImJoWXLloBlfRHuf38sLj5L+yHc/75Y0vGzpB/C/e+LxcVnaT+Equ2LFaY84IYPH65s3bpVURRF2bp1qzJ8+PAiZbZs2aKMHj1aKSgoUDIyMpSuXbsqV69erZL4srKylO+++878+d1331Vmz55dpFxgYKDy+eefV0lMf9SjRw/l559/LrXMypUrlbfeektRFEX57bfflM6dOys3b96sivBUFi5cqMyfP7/I8hUrVijvvvtulcRw7NgxJTExschxs6QvKsr974/FxWdpP1SU+98XSzp+lvRDRbn/fbGk+O5VUj9UlKrtixX1QA8lZWRkcP78eQYMGADAgAEDOH/+PJmZmapyO3bsYPDgwWi1WhwdHenVqxe7du2qkhgdHBzw9PQ0f3Z3dycxMbFK9l2Zdu7cyZAhQwBwc3Ojbdu2HDx4sEpjMBqNxMTE8PLLL1fpfv/IYDAUmePc0r4I978/FhdfTeqHxcVXHve7L5YVX03ph9Z4oBNDUlISzs7O2NjYAGBjY0Pjxo1JSkoqUs7FxcX8Wa/Xk5ycXKWxQuH82evWrcPb27vY9WvWrMHX15eJEydy6dKlKo0tICAAX19f5s2bx/Xr14usT0xMpGnTpubP1XEMv/76a5ydnXnqqaeKXb99+3Z8fX0ZPXo0J0+erNLYLO2Ld8tWZ38sqx9C9fXFsvohVH9fLKsfQvX2RUs80Inhz2bBggXY2dkxbNiwIuumT59OXFwcMTEx9OnTh7Fjx1JQUFAlca1du5avvvqKTZs2oSgKISEhVbLf8tq0aVOJZ2n+/v7s3buXmJgYxowZw8SJE8nKyqriCP8cSuuHUH198UHoh/Dn6IsPdGLQ6/WkpKSYO21BQQGpqalFLgP1er3qsjkpKYkmTZpUaayhoaHEx8ezfPlytNqiX4uzs7N5+cCBA7l9+3aVnQXdPV46nY6hQ4dy4sSJImVcXFxISEgwf67qY5iSksKxY8fw9fUtdr2TkxO2trYAeHl5odfruXjxYpXFZ2lfvFu2uvpjWf0Qqq8vWtIPoXr7Yln9EKq/L1rigU4MjRo1onXr1mzbtg2Abdu20bp16yJPWvj4+LBx40ZMJhOZmZns2bOHvn37VlmcYWFhnD17lvDwcHQ6XbFlUlJSzP8/dOgQWq0WZ2fn+x7b7du3uXGjcDJzRVHYsWMHrVu3LlLOx8eHDRs2AHD58mXOnDlT7BMj98uWLVvo3r27+UmZP7r3+F24cIGEhARatGhRVeFZ3Beh+vqjJf0QqqcvWtoPoXr7Yln9EKq/L1rigZ+o59KlSwQFBXH9+nXq169PaGgojz76KOPGjWPKlCm0a9eOgoICQkJC+OabbwAYN26c+ebV/Xbx4kUGDBiAm5sbderUAaBZs2aEh4fj5+dHZGQkzs7OvPbaa2RkZKDRaLC3t2fWrFm4u7vf9/iuXr3K5MmTKSgowGQy8dhjjzFnzhwaN26siu/27dsEBQVx4cIFtFotM2fOpFevXvc9vrv69u3LW2+9Rbdu3czL7v2OAwMDOXfuHFqtFltbW6ZMmUL37t3vSywLFy5k9+7dpKen07BhQxwcHNi+fXuJffGPsd7v/lhcfMuXLy+xHwJV2heLiy8iIqLEfvjH+O53Xyzp+4Xi+yFUX1+sqAc+MQghhCifB3ooSQghRPlJYhBCCKEiiUEIIYSKJAYhhBAqkhiEEEKoSGIQohJERETw1ltvVWjboKAgli1bVskRCVFxD/xrt4WoChMmTKjuEISoNHLFIIQQQkUSg/hLSklJYfLkyTz77LN4e3vz2WefAYWTqEyZMoVp06bh4eHBoEGD+Omnn8zbRUZG0rVrVzw8POjbty/ffvutebuAgABzub1799K/f38MBgPDhw9XvYH0/PnzDBo0CA8PD6ZNm0Zubq4qtn379uHn54fBYMDf39+i/QtRqapvKgghqkdBQYEyaNAgZeXKlUpubq5y5coVxdvbWzl48KCyYsUKpU2bNsrOnTsVo9GorF69WunRo4diNBqVS5cuKd26dVOSk5MVRVGUq1evKvHx8YqiFE6+MmPGDEVRFOXXX39Vnn76aeXw4cOK0WhUIiMjlV69eim5ublKbm6u8vzzzytr1qxRjEajsnPnTqVNmzZKWFiYoiiKcu7cOeXZZ59VTp06peTn5yubN29WevTooeTm5pa6fyEqk1wxiL+cM2fOkJmZyRtvvIFOp8PV1ZW///3v7NixA4CnnnoKHx8fbG1tGTVqFEajkR9//BEbGxuMRiOXLl0iLy+PZs2a8cgjjxSpf8eOHXTv3h0vLy9sbW0ZM2YMOTk5nDx5kh9//JG8vDxGjhyJra0tPj4+tGvXzrzthg0bGDJkCE8//TQ2NjYMGjQIW1tbTp06ZfH+hbCW3HwWfzkJCQmkpqZiMBjMywoKCjAYDLi4uKhe0Xz3zaF3y//zn/9k5cqV/O9//6NLly4EBQUVebNoamqqaqIdrVZrfu22jY0Nzs7OaDQa8/p7yyYmJrJ161a++OIL87K8vDxSU1Pp1KmTRfsXwlpyxSD+cvR6Pc2aNeP48ePmfydPnuRf//oXgGpuAZPJREpKivktnr6+vqxbt459+/ah0WhYunRpkfobN26smk9BURTzDG5OTk6kpKSg3PPuynvL6vV6JkyYoIrtxx9/NE8Jasn+hbCWJAbxl9O+fXseeughIiMjycnJoaCggF9++YXTp08DcO7cOXbv3k1+fj6ffvopOp2Op59+ml9//ZVvv/0Wo9GITqejdu3axU5m069fPw4cOMC3335LXl4e//73v9HpdHh4eODu7k6tWrX47LPPyMvLY/fu3Zw5c8a87eDBg1m/fj0//vgjiqJw+/Zt9u/fz82bNy3evxDWkqEk8ZdjY2NDREQEoaGh9OzZE6PRSIsWLZg2bRoAPXv2ZMeOHQQGBtK8eXNWrlyJra0tRqOR999/n0uXLmFra4uHh0ex00s++uijvPfeeyxYsICUlBRat25NRESEefKblStX8vbbb7N8+XK6d+9O7969zdu2a9eOBQsWEBISQnx8PHXq1KFDhw4YDAaL9y+EtWQ+BiHusXLlSuLj42WIRvylyXWoEEIIFUkMQgghVGQoSQghhIpcMQghhFCRxCCEEEJFEoMQQggVSQxCCCFUJDEIIYRQkcQghBBC5f8BF8PqxKKrn84AAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def eval(cfg,env,agent):\n", - " print('开始测试!')\n", - " print(f'环境:{cfg.env}, 算法:{cfg.algo}, 设备:{cfg.device}')\n", - " # 由于测试不需要使用epsilon-greedy策略,所以相应的值设置为0\n", - " cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon\n", - " cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon\n", - " rewards = [] # 记录所有回合的奖励\n", - " ma_rewards = [] # 记录所有回合的滑动平均奖励\n", - " for i_ep in range(cfg.test_eps):\n", - " ep_reward = 0 # 记录一回合内的奖励\n", - " state = env.reset() # 重置环境,返回初始状态\n", - " while True:\n", - " action = agent.choose_action(state) # 选择动作\n", - " next_state, reward, done, _ = env.step(action) # 更新环境,返回transition\n", - " state = next_state # 更新下一个状态\n", - " ep_reward += reward # 累加奖励\n", - " if done:\n", - " break\n", - " rewards.append(ep_reward)\n", - " if ma_rewards:\n", - " ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1)\n", - " else:\n", - " ma_rewards.append(ep_reward)\n", - " if (i_ep+1)%3 == 0: \n", - " print(f\"回合:{i_ep+1}/{cfg.test_eps}, 奖励:{ep_reward:.1f}\")\n", - " print('完成测试!')\n", - " return rewards,ma_rewards\n", - "\n", - "rewards,ma_rewards = eval(cfg,env,agent)\n", - "plot_rewards(rewards,ma_rewards, plot_cfg) # 画出结果\n" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "fe38df673a99c62a9fea33a7aceda74c9b65b12ee9d076c5851d98b692a4989a" - }, - "kernelspec": { - "display_name": "Python 3.7.10 64-bit ('py37': conda)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.9" - }, - "metadata": { - "interpreter": { - "hash": "366e1054dee9d4501b0eb8f87335afd3c67fc62db6ee611bbc7f8f5a1fefe232" - } - }, - "orig_nbformat": 2 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/codes/DQN/train.py b/codes/DQN/train.py deleted file mode 100644 index 54fe1d8..0000000 --- a/codes/DQN/train.py +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -@Author: John -@Email: johnjim0816@gmail.com -@Date: 2020-06-12 00:48:57 -@LastEditor: John -LastEditTime: 2021-12-22 11:08:04 -@Discription: -@Environment: python 3.7.7 -''' -def train(cfg, env, agent): - ''' 训练 - ''' - print('开始训练!') - print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') - rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 - for i_ep in range(cfg.train_eps): - ep_reward = 0 # 记录一回合内的奖励 - state = env.reset() # 重置环境,返回初始状态 - while True: - action = agent.choose_action(state) # 选择动作 - next_state, reward, done, _ = env.step(action) # 更新环境,返回transition - agent.memory.push(state, action, reward, next_state, done) # 保存transition - state = next_state # 更新下一个状态 - agent.update() # 更新智能体 - ep_reward += reward # 累加奖励 - if done: - break - if (i_ep+1) % cfg.target_update == 0: # 智能体目标网络更新 - agent.target_net.load_state_dict(agent.policy_net.state_dict()) - rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward) - else: - ma_rewards.append(ep_reward) - if (i_ep+1)%10 == 0: - print('回合:{}/{}, 奖励:{}'.format(i_ep+1, cfg.train_eps, ep_reward)) - print('完成训练!') - return rewards, ma_rewards - -def test(cfg,env,agent): - print('开始测试!') - print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') - # 由于测试不需要使用epsilon-greedy策略,所以相应的值设置为0 - cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon - cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon - rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 - for i_ep in range(cfg.test_eps): - ep_reward = 0 # 记录一回合内的奖励 - state = env.reset() # 重置环境,返回初始状态 - while True: - action = agent.choose_action(state) # 选择动作 - next_state, reward, done, _ = env.step(action) # 更新环境,返回transition - state = next_state # 更新下一个状态 - ep_reward += reward # 累加奖励 - if done: - break - rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) - else: - ma_rewards.append(ep_reward) - print(f"回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.1f}") - print('完成测试!') - return rewards,ma_rewards - -if __name__ == "__main__": - import sys,os - curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 - parent_path = os.path.dirname(curr_path) # 父路径 - sys.path.append(parent_path) # 添加路径到系统路径 - - import gym - import torch - import datetime - - from common.utils import save_results, make_dir - from common.utils import plot_rewards - from DQN.agent import DQN - from DQN.train import train - - curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 - class DQNConfig: - def __init__(self): - self.algo = "DQN" # 算法名称 - self.env_name = 'CartPole-v0' # 环境名称 - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测GPU - self.train_eps = 200 # 训练的回合数 - self.test_eps = 30 # 测试的回合数 - # 超参数 - self.gamma = 0.95 # 强化学习中的折扣因子 - self.epsilon_start = 0.90 # e-greedy策略中初始epsilon - self.epsilon_end = 0.01 # e-greedy策略中的终止epsilon - self.epsilon_decay = 500 # e-greedy策略中epsilon的衰减率 - self.lr = 0.0001 # 学习率 - self.memory_capacity = 100000 # 经验回放的容量 - self.batch_size = 64 # mini-batch SGD中的批量大小 - self.target_update = 4 # 目标网络的更新频率 - self.hidden_dim = 256 # 网络隐藏层 - class PlotConfig: - def __init__(self) -> None: - self.algo = "DQN" # 算法名称 - self.env_name = 'CartPole-v0' # 环境名称 - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测GPU - self.result_path = curr_path+"/outputs/" + self.env_name + \ - '/'+curr_time+'/results/' # 保存结果的路径 - self.model_path = curr_path+"/outputs/" + self.env_name + \ - '/'+curr_time+'/models/' # 保存模型的路径 - self.save = True # 是否保存图片 - - def env_agent_config(cfg,seed=1): - ''' 创建环境和智能体 - ''' - env = gym.make(cfg.env_name) # 创建环境 - env.seed(seed) # 设置随机种子 - state_dim = env.observation_space.shape[0] # 状态数 - action_dim = env.action_space.n # 动作数 - agent = DQN(state_dim,action_dim,cfg) # 创建智能体 - return env,agent - - cfg = DQNConfig() - plot_cfg = PlotConfig() - # 训练 - env,agent = env_agent_config(cfg,seed=1) - rewards, ma_rewards = train(cfg, env, agent) - make_dir(plot_cfg.result_path, plot_cfg.model_path) # 创建保存结果和模型路径的文件夹 - agent.save(path=plot_cfg.model_path) # 保存模型 - save_results(rewards, ma_rewards, tag='train', path=plot_cfg.result_path) # 保存结果 - plot_rewards(rewards, ma_rewards, plot_cfg, tag="train") # 画出结果 - # 测试 - env,agent = env_agent_config(cfg,seed=10) - agent.load(path=plot_cfg.model_path) # 导入模型 - rewards,ma_rewards = test(cfg,env,agent) - save_results(rewards,ma_rewards,tag='test',path=plot_cfg.result_path) # 保存结果 - plot_rewards(rewards,ma_rewards, plot_cfg, tag="test") # 画出结果 \ No newline at end of file diff --git a/codes/Docs/使用DDPG解决倒立摆问题.md b/codes/Docs/使用DDPG解决倒立摆问题.md index da815dc..cfcf2a9 100644 --- a/codes/Docs/使用DDPG解决倒立摆问题.md +++ b/codes/Docs/使用DDPG解决倒立摆问题.md @@ -90,15 +90,15 @@ class OUNoise(object): self.max_sigma = max_sigma self.min_sigma = min_sigma self.decay_period = decay_period - self.action_dim = action_space.shape[0] + self.n_actions = action_space.shape[0] self.low = action_space.low self.high = action_space.high self.reset() def reset(self): - self.obs = np.ones(self.action_dim) * self.mu + self.obs = np.ones(self.n_actions) * self.mu def evolve_obs(self): x = self.obs - dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.action_dim) + dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.n_actions) self.obs = x + dx return self.obs def get_action(self, action, t=0): diff --git a/codes/Docs/使用DQN解决推车杆问题.md b/codes/Docs/使用DQN解决推车杆问题.md index ac56ac6..a09fec7 100644 --- a/codes/Docs/使用DQN解决推车杆问题.md +++ b/codes/Docs/使用DQN解决推车杆问题.md @@ -14,10 +14,10 @@ CartPole-v0是一个经典的入门环境,如下图,它通过向左(动作=0 import gym env = gym.make('CartPole-v0') # 建立环境 env.seed(1) # 随机种子 -state_dim = env.observation_space.shape[0] # 状态数 -action_dim = env.action_space.n # 动作数 +n_states = env.observation_space.shape[0] # 状态数 +n_actions = env.action_space.n # 动作数 state = env.reset() # 初始化环境 -print(f"状态数:{state_dim},动作数:{action_dim}") +print(f"状态数:{n_states},动作数:{n_actions}") print(f"初始状态:{state}") ``` @@ -157,7 +157,7 @@ def choose_action(self, state): q_values = self.policy_net(state) action = q_values.max(1)[1].item() # 选择Q值最大的动作 else: - action = random.randrange(self.action_dim) + action = random.randrange(self.n_actions) ``` 可以看到跟Q学习算法其实是一样的,都是用的$\epsilon-greedy$策略,只是使用神经网络的话我们需要通过Torch或者Tensorflow工具来处理相应的数据。 diff --git a/codes/Docs/使用Q-learning解决悬崖寻路问题.md b/codes/Docs/使用Q-learning解决悬崖寻路问题.md index ac25945..3480d2f 100644 --- a/codes/Docs/使用Q-learning解决悬崖寻路问题.md +++ b/codes/Docs/使用Q-learning解决悬崖寻路问题.md @@ -30,9 +30,9 @@ env = CliffWalkingWapper(env) # 装饰环境 这里我们在程序中使用了一个装饰器重新定义环境,但不影响对环境的理解,感兴趣的同学具体看相关代码。可以由于gym环境封装得比较好,所以我们想要使用这个环境只需要使用gym.make命令输入函数名即可,然后我们可以查看环境的状态和动作数目: ```python -state_dim = env.observation_space.n # 状态数 -action_dim = env.action_space.n # 动作数 -print(f"状态数:{state_dim},动作数:{action_dim}") +n_states = env.observation_space.n # 状态数 +n_actions = env.action_space.n # 动作数 +print(f"状态数:{n_states},动作数:{n_actions}") ``` 打印出来的结果如下: @@ -72,9 +72,9 @@ print(state) env = gym.make('CliffWalking-v0') # 定义环境 env = CliffWalkingWapper(env) # 装饰环境 env.seed(1) # 设置随机种子 -state_dim = env.observation_space.n # 状态数 -action_dim = env.action_space.n # 动作数 -agent = QLearning(state_dim,action_dim,cfg) # cfg存储算法相关参数 +n_states = env.observation_space.n # 状态数 +n_actions = env.action_space.n # 动作数 +agent = QLearning(n_states,n_actions,cfg) # cfg存储算法相关参数 for i_ep in range(cfg.train_eps): # cfg.train_eps表示最大训练的回合数 ep_reward = 0 # 记录每个回合的奖励 state = env.reset() # 重置环境 @@ -126,7 +126,7 @@ def choose_action(self, state): if np.random.uniform(0, 1) > self.epsilon: action = np.argmax(self.Q_table[str(state)]) # 选择Q(s,a)最大对应的动作 else: - action = np.random.choice(self.action_dim) # 随机选择动作 + action = np.random.choice(self.n_actions) # 随机选择动作 return action ``` diff --git a/codes/DuelingDQN/task0_train.ipynb b/codes/DuelingDQN/task0_train.ipynb index 7e38218..efa485f 100644 --- a/codes/DuelingDQN/task0_train.ipynb +++ b/codes/DuelingDQN/task0_train.ipynb @@ -136,12 +136,12 @@ "outputs": [], "source": [ "class DuelingNet(nn.Module):\n", - " def __init__(self, state_dim, action_dim,hidden_size=128):\n", + " def __init__(self, n_states, n_actions,hidden_size=128):\n", " super(DuelingNet, self).__init__()\n", " \n", " # 隐藏层\n", " self.hidden = nn.Sequential(\n", - " nn.Linear(state_dim, hidden_size),\n", + " nn.Linear(n_states, hidden_size),\n", " nn.ReLU()\n", " )\n", " \n", @@ -149,7 +149,7 @@ " self.advantage = nn.Sequential(\n", " nn.Linear(hidden_size, hidden_size),\n", " nn.ReLU(),\n", - " nn.Linear(hidden_size, action_dim)\n", + " nn.Linear(hidden_size, n_actions)\n", " )\n", " \n", " # 价值函数\n", @@ -192,7 +192,7 @@ ], "source": [ "class DuelingDQN:\n", - " def __init__(self,state_dim,action_dim,cfg) -> None:\n", + " def __init__(self,n_states,n_actions,cfg) -> None:\n", " self.batch_size = cfg.batch_size\n", " self.device = cfg.device\n", " self.loss_history = [] # 记录loss的变化\n", @@ -200,8 +200,8 @@ " self.epsilon = lambda frame_idx: cfg.epsilon_end + \\\n", " (cfg.epsilon_start - cfg.epsilon_end) * \\\n", " math.exp(-1. * frame_idx / cfg.epsilon_decay)\n", - " self.policy_net = DuelingNet(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device)\n", - " self.target_net = DuelingNet(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device)\n", + " self.policy_net = DuelingNet(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device)\n", + " self.target_net = DuelingNet(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device)\n", " for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # 复制参数到目标网络targe_net\n", " target_param.data.copy_(param.data)\n", " self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # 优化器\n", @@ -214,7 +214,7 @@ " q_values = self.policy_net(state)\n", " action = q_values.max(1)[1].item() # 选择Q值最大的动作\n", " else:\n", - " action = random.randrange(self.action_dim)\n", + " action = random.randrange(self.n_actions)\n", " return action\n", " def update(self):\n", " if len(self.memory) < self.batch_size: # 当memory中不满足一个批量时,不更新策略\n", diff --git a/codes/Logs.md b/codes/Logs.md new file mode 100644 index 0000000..4efc3cd --- /dev/null +++ b/codes/Logs.md @@ -0,0 +1,5 @@ +## 记录笔者更新的日志 + +**2021.12.22-3**:将```agent.py```更改为对应的算法名称,便于区分如```dqn```与```dqn_cnn```的情况 +**2021.12.22-2**:简化了代码结构,将原来的```train.py```和```task.py```等合并到```task.py```中 +**2021.12.22-1**:简化了代码结构,将原来的```model.py```和```memory.py```等合并到```agent.py```中,```plot.py```的内容合并到```common.utils.py```中 \ No newline at end of file diff --git a/codes/MonteCarlo/agent.py b/codes/MonteCarlo/agent.py index 44af71d..bfe6940 100644 --- a/codes/MonteCarlo/agent.py +++ b/codes/MonteCarlo/agent.py @@ -17,11 +17,11 @@ import dill class FisrtVisitMC: ''' On-Policy First-Visit MC Control ''' - def __init__(self,action_dim,cfg): - self.action_dim = action_dim + def __init__(self,n_actions,cfg): + self.n_actions = n_actions self.epsilon = cfg.epsilon self.gamma = cfg.gamma - self.Q_table = defaultdict(lambda: np.zeros(action_dim)) + self.Q_table = defaultdict(lambda: np.zeros(n_actions)) self.returns_sum = defaultdict(float) # sum of returns self.returns_count = defaultdict(float) @@ -29,11 +29,11 @@ class FisrtVisitMC: ''' e-greed policy ''' if state in self.Q_table.keys(): best_action = np.argmax(self.Q_table[state]) - action_probs = np.ones(self.action_dim, dtype=float) * self.epsilon / self.action_dim + action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions action_probs[best_action] += (1.0 - self.epsilon) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) else: - action = np.random.randint(0,self.action_dim) + action = np.random.randint(0,self.n_actions) return action def update(self,one_ep_transition): # Find all (state, action) pairs we've visited in this one_ep_transition diff --git a/codes/MonteCarlo/task0_train.py b/codes/MonteCarlo/task0_train.py index dae0c95..51858f8 100644 --- a/codes/MonteCarlo/task0_train.py +++ b/codes/MonteCarlo/task0_train.py @@ -43,8 +43,8 @@ class MCConfig: def env_agent_config(cfg,seed=1): env = RacetrackEnv() - action_dim = 9 - agent = FisrtVisitMC(action_dim, cfg) + n_actions = 9 + agent = FisrtVisitMC(n_actions, cfg) return env,agent def train(cfg, env, agent): diff --git a/codes/NoisyDQN/noisy_dqn.py b/codes/NoisyDQN/noisy_dqn.py new file mode 100644 index 0000000..45cc5d2 --- /dev/null +++ b/codes/NoisyDQN/noisy_dqn.py @@ -0,0 +1,52 @@ +import torch +import torch.nn as nn + +class NoisyLinear(nn.Module): + def __init__(self, input_dim, output_dim, std_init=0.4): + super(NoisyLinear, self).__init__() + + self.input_dim = input_dim + self.output_dim = output_dim + self.std_init = std_init + + self.weight_mu = nn.Parameter(torch.FloatTensor(output_dim, input_dim)) + self.weight_sigma = nn.Parameter(torch.FloatTensor(output_dim, input_dim)) + self.register_buffer('weight_epsilon', torch.FloatTensor(output_dim, input_dim)) + + self.bias_mu = nn.Parameter(torch.FloatTensor(output_dim)) + self.bias_sigma = nn.Parameter(torch.FloatTensor(output_dim)) + self.register_buffer('bias_epsilon', torch.FloatTensor(output_dim)) + + self.reset_parameters() + self.reset_noise() + + def forward(self, x): + if self.training: + weight = self.weight_mu + self.weight_sigma.mul( (self.weight_epsilon)) + bias = self.bias_mu + self.bias_sigma.mul(Variable(self.bias_epsilon)) + else: + weight = self.weight_mu + bias = self.bias_mu + + return F.linear(x, weight, bias) + + def reset_parameters(self): + mu_range = 1 / math.sqrt(self.weight_mu.size(1)) + + self.weight_mu.data.uniform_(-mu_range, mu_range) + self.weight_sigma.data.fill_(self.std_init / math.sqrt(self.weight_sigma.size(1))) + + self.bias_mu.data.uniform_(-mu_range, mu_range) + self.bias_sigma.data.fill_(self.std_init / math.sqrt(self.bias_sigma.size(0))) + + def reset_noise(self): + epsilon_in = self._scale_noise(self.input_dim) + epsilon_out = self._scale_noise(self.output_dim) + + self.weight_epsilon.copy_(epsilon_out.ger(epsilon_in)) + self.bias_epsilon.copy_(self._scale_noise(self.output_dim)) + + def _scale_noise(self, size): + x = torch.randn(size) + x = x.sign().mul(x.abs().sqrt()) + return x \ No newline at end of file diff --git a/codes/PPO/README.md b/codes/PPO/README.md index 66825c9..125ef51 100644 --- a/codes/PPO/README.md +++ b/codes/PPO/README.md @@ -57,16 +57,16 @@ model就是actor和critic两个网络了: import torch.nn as nn from torch.distributions.categorical import Categorical class Actor(nn.Module): - def __init__(self,state_dim, action_dim, + def __init__(self,n_states, n_actions, hidden_dim=256): super(Actor, self).__init__() self.actor = nn.Sequential( - nn.Linear(state_dim, hidden_dim), + nn.Linear(n_states, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), - nn.Linear(hidden_dim, action_dim), + nn.Linear(hidden_dim, n_actions), nn.Softmax(dim=-1) ) def forward(self, state): @@ -75,10 +75,10 @@ class Actor(nn.Module): return dist class Critic(nn.Module): - def __init__(self, state_dim,hidden_dim=256): + def __init__(self, n_states,hidden_dim=256): super(Critic, self).__init__() self.critic = nn.Sequential( - nn.Linear(state_dim, hidden_dim), + nn.Linear(n_states, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), @@ -88,7 +88,7 @@ class Critic(nn.Module): value = self.critic(state) return value ``` -这里Actor就是得到一个概率分布(Categorica,也可以是别的分布,可以搜索torch distributionsl),critc根据当前状态得到一个值,这里的输入维度可以是```state_dim+action_dim```,即将action信息也纳入critic网络中,这样会更好一些,感兴趣的小伙伴可以试试。 +这里Actor就是得到一个概率分布(Categorica,也可以是别的分布,可以搜索torch distributionsl),critc根据当前状态得到一个值,这里的输入维度可以是```n_states+n_actions```,即将action信息也纳入critic网络中,这样会更好一些,感兴趣的小伙伴可以试试。 ### PPO update 定义一个update函数主要实现伪代码中的第六步和第七步: diff --git a/codes/PPO/agent.py b/codes/PPO/agent.py index 0a7edd9..ebda626 100644 --- a/codes/PPO/agent.py +++ b/codes/PPO/agent.py @@ -16,15 +16,15 @@ import torch.optim as optim from PPO.model import Actor,Critic from PPO.memory import PPOMemory class PPO: - def __init__(self, state_dim, action_dim,cfg): + def __init__(self, n_states, n_actions,cfg): self.gamma = cfg.gamma self.continuous = cfg.continuous self.policy_clip = cfg.policy_clip self.n_epochs = cfg.n_epochs self.gae_lambda = cfg.gae_lambda self.device = cfg.device - self.actor = Actor(state_dim, action_dim,cfg.hidden_dim).to(self.device) - self.critic = Critic(state_dim,cfg.hidden_dim).to(self.device) + self.actor = Actor(n_states, n_actions,cfg.hidden_dim).to(self.device) + self.critic = Critic(n_states,cfg.hidden_dim).to(self.device) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=cfg.actor_lr) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=cfg.critic_lr) self.memory = PPOMemory(cfg.batch_size) diff --git a/codes/PPO/model.py b/codes/PPO/model.py index fc182d5..612ddff 100644 --- a/codes/PPO/model.py +++ b/codes/PPO/model.py @@ -12,16 +12,16 @@ Environment: import torch.nn as nn from torch.distributions.categorical import Categorical class Actor(nn.Module): - def __init__(self,state_dim, action_dim, + def __init__(self,n_states, n_actions, hidden_dim): super(Actor, self).__init__() self.actor = nn.Sequential( - nn.Linear(state_dim, hidden_dim), + nn.Linear(n_states, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), - nn.Linear(hidden_dim, action_dim), + nn.Linear(hidden_dim, n_actions), nn.Softmax(dim=-1) ) def forward(self, state): @@ -30,10 +30,10 @@ class Actor(nn.Module): return dist class Critic(nn.Module): - def __init__(self, state_dim,hidden_dim): + def __init__(self, n_states,hidden_dim): super(Critic, self).__init__() self.critic = nn.Sequential( - nn.Linear(state_dim, hidden_dim), + nn.Linear(n_states, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), diff --git a/codes/PPO/task0.py b/codes/PPO/task0.py index 8e0d92a..15794ec 100644 --- a/codes/PPO/task0.py +++ b/codes/PPO/task0.py @@ -45,9 +45,9 @@ class PlotConfig: def env_agent_config(cfg,seed=1): env = gym.make(cfg.env_name) env.seed(seed) - state_dim = env.observation_space.shape[0] - action_dim = env.action_space.n - agent = PPO(state_dim,action_dim,cfg) + n_states = env.observation_space.shape[0] + n_actions = env.action_space.n + agent = PPO(n_states,n_actions,cfg) return env,agent cfg = PPOConfig() diff --git a/codes/PPO/task1.py b/codes/PPO/task1.py index 38d9152..00feb2f 100644 --- a/codes/PPO/task1.py +++ b/codes/PPO/task1.py @@ -45,9 +45,9 @@ class PlotConfig: def env_agent_config(cfg,seed=1): env = gym.make(cfg.env_name) env.seed(seed) - state_dim = env.observation_space.shape[0] - action_dim = env.action_space.shape[0] - agent = PPO(state_dim,action_dim,cfg) + n_states = env.observation_space.shape[0] + n_actions = env.action_space.shape[0] + agent = PPO(n_states,n_actions,cfg) return env,agent diff --git a/codes/PPO/train.ipynb b/codes/PPO/train.ipynb index b2dc91a..2fe6570 100644 --- a/codes/PPO/train.ipynb +++ b/codes/PPO/train.ipynb @@ -90,9 +90,9 @@ "def env_agent_config(cfg,seed=1):\n", " env = gym.make(cfg.env) \n", " env.seed(seed)\n", - " state_dim = env.observation_space.shape[0]\n", - " action_dim = env.action_space.n\n", - " agent = PPO(state_dim,action_dim,cfg)\n", + " n_states = env.observation_space.shape[0]\n", + " n_actions = env.action_space.n\n", + " agent = PPO(n_states,n_actions,cfg)\n", " return env,agent" ] }, diff --git a/codes/PPO/train.py b/codes/PPO/train.py index e642df0..b97e287 100644 --- a/codes/PPO/train.py +++ b/codes/PPO/train.py @@ -99,9 +99,9 @@ if __name__ == '__main__': def env_agent_config(cfg,seed=1): env = gym.make(cfg.env_name) env.seed(seed) - state_dim = env.observation_space.shape[0] - action_dim = env.action_space.n - agent = PPO(state_dim,action_dim,cfg) + n_states = env.observation_space.shape[0] + n_actions = env.action_space.n + agent = PPO(n_states,n_actions,cfg) return env,agent cfg = PPOConfig() diff --git a/codes/PolicyGradient/agent.py b/codes/PolicyGradient/agent.py index 8f349b5..fa63ba0 100644 --- a/codes/PolicyGradient/agent.py +++ b/codes/PolicyGradient/agent.py @@ -17,9 +17,9 @@ from PolicyGradient.model import MLP class PolicyGradient: - def __init__(self, state_dim,cfg): + def __init__(self, n_states,cfg): self.gamma = cfg.gamma - self.policy_net = MLP(state_dim,hidden_dim=cfg.hidden_dim) + self.policy_net = MLP(n_states,hidden_dim=cfg.hidden_dim) self.optimizer = torch.optim.RMSprop(self.policy_net.parameters(), lr=cfg.lr) self.batch_size = cfg.batch_size diff --git a/codes/PolicyGradient/model.py b/codes/PolicyGradient/model.py index 6d9bc64..97d9935 100644 --- a/codes/PolicyGradient/model.py +++ b/codes/PolicyGradient/model.py @@ -19,7 +19,7 @@ class MLP(nn.Module): ''' def __init__(self,input_dim,hidden_dim = 36): super(MLP, self).__init__() - # 24和36为hidden layer的层数,可根据input_dim, action_dim的情况来改变 + # 24和36为hidden layer的层数,可根据input_dim, n_actions的情况来改变 self.fc1 = nn.Linear(input_dim, hidden_dim) self.fc2 = nn.Linear(hidden_dim,hidden_dim) self.fc3 = nn.Linear(hidden_dim, 1) # Prob of Left diff --git a/codes/PolicyGradient/task0_train.py b/codes/PolicyGradient/task0_train.py index b6866f0..1025a91 100644 --- a/codes/PolicyGradient/task0_train.py +++ b/codes/PolicyGradient/task0_train.py @@ -46,8 +46,8 @@ class PGConfig: def env_agent_config(cfg,seed=1): env = gym.make(cfg.env) env.seed(seed) - state_dim = env.observation_space.shape[0] - agent = PolicyGradient(state_dim,cfg) + n_states = env.observation_space.shape[0] + agent = PolicyGradient(n_states,cfg) return env,agent def train(cfg,env,agent): diff --git a/codes/README.md b/codes/README.md index 355127c..3896fbb 100644 --- a/codes/README.md +++ b/codes/README.md @@ -16,7 +16,7 @@ **注意:新版本中将```model```,```memory```相关内容全部放到了```agent.py```里面,```plot```放到了```common.utils```中。** ## 运行环境 -python 3.7、pytorch 1.6.0-1.8.1、gym 0.17.0-0.19.0 +python 3.7、pytorch 1.6.0-1.8.1、gym 0.21.0 ## 使用说明 @@ -36,7 +36,7 @@ python 3.7、pytorch 1.6.0-1.8.1、gym 0.17.0-0.19.0 | [Hierarchical DQN](HierarchicalDQN) | [H-DQN Paper](https://arxiv.org/abs/1604.06057) | [CartPole-v0](./envs/gym_info.md) | | | [PolicyGradient](./PolicyGradient) | [Lil'log](https://lilianweng.github.io/lil-log/2018/04/08/policy-gradient-algorithms.html) | [CartPole-v0](./envs/gym_info.md) | | | [A2C](./A2C) | [A3C Paper](https://arxiv.org/abs/1602.01783) | [CartPole-v0](./envs/gym_info.md) | | -| [SAC](./SAC) | [SAC Paper](https://arxiv.org/abs/1801.01290) | [Pendulum-v0](./envs/gym_info.md) | | +| [SAC](./SoftActorCritic) | [SAC Paper](https://arxiv.org/abs/1801.01290) | [Pendulum-v0](./envs/gym_info.md) | | | [PPO](./PPO) | [PPO paper](https://arxiv.org/abs/1707.06347) | [CartPole-v0](./envs/gym_info.md) | | | [DDPG](./DDPG) | [DDPG Paper](https://arxiv.org/abs/1509.02971) | [Pendulum-v0](./envs/gym_info.md) | | | [TD3](./TD3) | [TD3 Paper](https://arxiv.org/abs/1802.09477) | [HalfCheetah-v2]((./envs/mujoco_info.md)) | | diff --git a/codes/SAC/agent.py b/codes/SAC/agent.py deleted file mode 100644 index 1568eb3..0000000 --- a/codes/SAC/agent.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -Author: JiangJi -Email: johnjim0816@gmail.com -Date: 2021-04-29 12:53:54 -LastEditor: JiangJi -LastEditTime: 2021-04-29 13:56:39 -Discription: -Environment: -''' -import copy -import torch -import torch.nn as nn -import torch.optim as optim -import numpy as np -from common.memory import ReplayBuffer -from SAC.model import ValueNet,PolicyNet,SoftQNet - -class SAC: - def __init__(self,state_dim,action_dim,cfg) -> None: - self.batch_size = cfg.batch_size - self.memory = ReplayBuffer(cfg.capacity) - self.device = cfg.device - self.value_net = ValueNet(state_dim, cfg.hidden_dim).to(self.device) - self.target_value_net = ValueNet(state_dim, cfg.hidden_dim).to(self.device) - self.soft_q_net = SoftQNet(state_dim, action_dim, cfg.hidden_dim).to(self.device) - self.policy_net = PolicyNet(state_dim, action_dim, cfg.hidden_dim).to(self.device) - self.value_optimizer = optim.Adam(self.value_net.parameters(), lr=cfg.value_lr) - self.soft_q_optimizer = optim.Adam(self.soft_q_net.parameters(), lr=cfg.soft_q_lr) - self.policy_optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.policy_lr) - for target_param, param in zip(self.target_value_net.parameters(), self.value_net.parameters()): - target_param.data.copy_(param.data) - self.value_criterion = nn.MSELoss() - self.soft_q_criterion = nn.MSELoss() - def update(self, gamma=0.99,mean_lambda=1e-3, - std_lambda=1e-3, - z_lambda=0.0, - soft_tau=1e-2, - ): - if len(self.memory) < self.batch_size: - return - state, action, reward, next_state, done = self.memory.sample(self.batch_size) - state = torch.FloatTensor(state).to(self.device) - next_state = torch.FloatTensor(next_state).to(self.device) - action = torch.FloatTensor(action).to(self.device) - reward = torch.FloatTensor(reward).unsqueeze(1).to(self.device) - done = torch.FloatTensor(np.float32(done)).unsqueeze(1).to(self.device) - expected_q_value = self.soft_q_net(state, action) - expected_value = self.value_net(state) - new_action, log_prob, z, mean, log_std = self.policy_net.evaluate(state) - - - target_value = self.target_value_net(next_state) - next_q_value = reward + (1 - done) * gamma * target_value - q_value_loss = self.soft_q_criterion(expected_q_value, next_q_value.detach()) - - expected_new_q_value = self.soft_q_net(state, new_action) - next_value = expected_new_q_value - log_prob - value_loss = self.value_criterion(expected_value, next_value.detach()) - - log_prob_target = expected_new_q_value - expected_value - policy_loss = (log_prob * (log_prob - log_prob_target).detach()).mean() - - - mean_loss = mean_lambda * mean.pow(2).mean() - std_loss = std_lambda * log_std.pow(2).mean() - z_loss = z_lambda * z.pow(2).sum(1).mean() - - policy_loss += mean_loss + std_loss + z_loss - - self.soft_q_optimizer.zero_grad() - q_value_loss.backward() - self.soft_q_optimizer.step() - - self.value_optimizer.zero_grad() - value_loss.backward() - self.value_optimizer.step() - - self.policy_optimizer.zero_grad() - policy_loss.backward() - self.policy_optimizer.step() - - - for target_param, param in zip(self.target_value_net.parameters(), self.value_net.parameters()): - target_param.data.copy_( - target_param.data * (1.0 - soft_tau) + param.data * soft_tau - ) - def save(self, path): - torch.save(self.value_net.state_dict(), path + "sac_value") - torch.save(self.value_optimizer.state_dict(), path + "sac_value_optimizer") - - torch.save(self.soft_q_net.state_dict(), path + "sac_soft_q") - torch.save(self.soft_q_optimizer.state_dict(), path + "sac_soft_q_optimizer") - - torch.save(self.policy_net.state_dict(), path + "sac_policy") - torch.save(self.policy_optimizer.state_dict(), path + "sac_policy_optimizer") - - - - def load(self, path): - self.value_net.load_state_dict(torch.load(path + "sac_value")) - self.value_optimizer.load_state_dict(torch.load(path + "sac_value_optimizer")) - self.target_value_net = copy.deepcopy(self.value_net) - - self.soft_q_net.load_state_dict(torch.load(path + "sac_soft_q")) - self.soft_q_optimizer.load_state_dict(torch.load(path + "sac_soft_q_optimizer")) - - self.policy_net.load_state_dict(torch.load(path + "sac_policy")) - self.policy_optimizer.load_state_dict(torch.load(path + "sac_policy_optimizer")) \ No newline at end of file diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_policy b/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_policy deleted file mode 100644 index 12479e2..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_policy and /dev/null differ diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_policy_optimizer b/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_policy_optimizer deleted file mode 100644 index 6dea232..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_policy_optimizer and /dev/null differ diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_soft_q b/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_soft_q deleted file mode 100644 index d2d5352..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_soft_q and /dev/null differ diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_soft_q_optimizer b/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_soft_q_optimizer deleted file mode 100644 index d4c3e48..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_soft_q_optimizer and /dev/null differ diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_value b/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_value deleted file mode 100644 index a180f73..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_value and /dev/null differ diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_value_optimizer b/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_value_optimizer deleted file mode 100644 index f2ab113..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/models/sac_value_optimizer and /dev/null differ diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/eval_ma_rewards.npy b/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/eval_ma_rewards.npy deleted file mode 100644 index 4971d4f..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/eval_ma_rewards.npy and /dev/null differ diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/eval_rewards.npy b/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/eval_rewards.npy deleted file mode 100644 index 46bd706..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/eval_rewards.npy and /dev/null differ diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/eval_rewards_curve.png b/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/eval_rewards_curve.png deleted file mode 100644 index 3d4dd84..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/eval_rewards_curve.png and /dev/null differ diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/train_ma_rewards.npy b/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/train_ma_rewards.npy deleted file mode 100644 index bffae05..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/train_ma_rewards.npy and /dev/null differ diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/train_rewards.npy b/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/train_rewards.npy deleted file mode 100644 index 37837a6..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/train_rewards.npy and /dev/null differ diff --git a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/train_rewards_curve.png b/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/train_rewards_curve.png deleted file mode 100644 index 399b952..0000000 Binary files a/codes/SAC/outputs/Pendulum-v0/20210506-014740/results/train_rewards_curve.png and /dev/null differ diff --git a/codes/Sarsa/agent.py b/codes/Sarsa/agent.py index 020f6da..3753381 100644 --- a/codes/Sarsa/agent.py +++ b/codes/Sarsa/agent.py @@ -14,17 +14,17 @@ from collections import defaultdict import torch class Sarsa(object): def __init__(self, - action_dim,sarsa_cfg,): - self.action_dim = action_dim # number of actions + n_actions,sarsa_cfg,): + self.n_actions = n_actions # number of actions self.lr = sarsa_cfg.lr # learning rate self.gamma = sarsa_cfg.gamma self.epsilon = sarsa_cfg.epsilon - self.Q = defaultdict(lambda: np.zeros(action_dim)) - # self.Q = np.zeros((state_dim, action_dim)) # Q表 + self.Q = defaultdict(lambda: np.zeros(n_actions)) + # self.Q = np.zeros((n_states, n_actions)) # Q表 def choose_action(self, state): best_action = np.argmax(self.Q[state]) # action = best_action - action_probs = np.ones(self.action_dim, dtype=float) * self.epsilon / self.action_dim + action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions action_probs[best_action] += (1.0 - self.epsilon) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) return action diff --git a/codes/Sarsa/task0_train.py b/codes/Sarsa/task0_train.py index e477afa..38fc598 100644 --- a/codes/Sarsa/task0_train.py +++ b/codes/Sarsa/task0_train.py @@ -39,8 +39,8 @@ class SarsaConfig: def env_agent_config(cfg,seed=1): env = RacetrackEnv() - action_dim=9 - agent = Sarsa(action_dim,cfg) + n_actions=9 + agent = Sarsa(n_actions,cfg) return env,agent def train(cfg,env,agent): diff --git a/codes/SAC/env.py b/codes/SoftActorCritic/env_wrapper.py similarity index 95% rename from codes/SAC/env.py rename to codes/SoftActorCritic/env_wrapper.py index 14e37a7..dfe1c4d 100644 --- a/codes/SAC/env.py +++ b/codes/SoftActorCritic/env_wrapper.py @@ -5,12 +5,13 @@ Author: JiangJi Email: johnjim0816@gmail.com Date: 2021-04-29 12:52:11 LastEditor: JiangJi -LastEditTime: 2021-04-29 12:52:31 +LastEditTime: 2021-12-22 15:36:36 Discription: Environment: ''' import gym import numpy as np + class NormalizedActions(gym.ActionWrapper): def action(self, action): low = self.action_space.low diff --git a/codes/SAC/model.py b/codes/SoftActorCritic/model.py similarity index 84% rename from codes/SAC/model.py rename to codes/SoftActorCritic/model.py index 85bbfcd..ba04737 100644 --- a/codes/SAC/model.py +++ b/codes/SoftActorCritic/model.py @@ -17,10 +17,10 @@ from torch.distributions import Normal device=torch.device("cuda" if torch.cuda.is_available() else "cpu") class ValueNet(nn.Module): - def __init__(self, state_dim, hidden_dim, init_w=3e-3): + def __init__(self, n_states, hidden_dim, init_w=3e-3): super(ValueNet, self).__init__() - self.linear1 = nn.Linear(state_dim, hidden_dim) + self.linear1 = nn.Linear(n_states, hidden_dim) self.linear2 = nn.Linear(hidden_dim, hidden_dim) self.linear3 = nn.Linear(hidden_dim, 1) @@ -35,10 +35,10 @@ class ValueNet(nn.Module): class SoftQNet(nn.Module): - def __init__(self, state_dim, action_dim, hidden_dim, init_w=3e-3): + def __init__(self, n_states, n_actions, hidden_dim, init_w=3e-3): super(SoftQNet, self).__init__() - self.linear1 = nn.Linear(state_dim + action_dim, hidden_dim) + self.linear1 = nn.Linear(n_states + n_actions, hidden_dim) self.linear2 = nn.Linear(hidden_dim, hidden_dim) self.linear3 = nn.Linear(hidden_dim, 1) @@ -54,20 +54,20 @@ class SoftQNet(nn.Module): class PolicyNet(nn.Module): - def __init__(self, state_dim, action_dim, hidden_dim, init_w=3e-3, log_std_min=-20, log_std_max=2): + def __init__(self, n_states, n_actions, hidden_dim, init_w=3e-3, log_std_min=-20, log_std_max=2): super(PolicyNet, self).__init__() self.log_std_min = log_std_min self.log_std_max = log_std_max - self.linear1 = nn.Linear(state_dim, hidden_dim) + self.linear1 = nn.Linear(n_states, hidden_dim) self.linear2 = nn.Linear(hidden_dim, hidden_dim) - self.mean_linear = nn.Linear(hidden_dim, action_dim) + self.mean_linear = nn.Linear(hidden_dim, n_actions) self.mean_linear.weight.data.uniform_(-init_w, init_w) self.mean_linear.bias.data.uniform_(-init_w, init_w) - self.log_std_linear = nn.Linear(hidden_dim, action_dim) + self.log_std_linear = nn.Linear(hidden_dim, n_actions) self.log_std_linear.weight.data.uniform_(-init_w, init_w) self.log_std_linear.bias.data.uniform_(-init_w, init_w) diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy new file mode 100644 index 0000000..9ae4e7b Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy differ diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy_optimizer b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy_optimizer new file mode 100644 index 0000000..49c0d2a Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy_optimizer differ diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q new file mode 100644 index 0000000..3ff692f Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q differ diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q_optimizer b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q_optimizer new file mode 100644 index 0000000..73be931 Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q_optimizer differ diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value new file mode 100644 index 0000000..853ac6f Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value differ diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value_optimizer b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value_optimizer new file mode 100644 index 0000000..79410e4 Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value_optimizer differ diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_ma_rewards.npy b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_ma_rewards.npy new file mode 100644 index 0000000..eca3369 Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_ma_rewards.npy differ diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards.npy b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards.npy new file mode 100644 index 0000000..09edb0e Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards.npy differ diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards_curve.png b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards_curve.png new file mode 100644 index 0000000..5cc6e1d Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards_curve.png differ diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_ma_rewards.npy b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_ma_rewards.npy new file mode 100644 index 0000000..3e1feac Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_ma_rewards.npy differ diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards.npy b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards.npy new file mode 100644 index 0000000..1c77a83 Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards.npy differ diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards_curve.png b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards_curve.png new file mode 100644 index 0000000..3e4c8aa Binary files /dev/null and b/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards_curve.png differ diff --git a/codes/SoftActorCritic/sac.py b/codes/SoftActorCritic/sac.py new file mode 100644 index 0000000..c67257f --- /dev/null +++ b/codes/SoftActorCritic/sac.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2021-04-29 12:53:54 +LastEditor: JiangJi +LastEditTime: 2021-12-22 15:41:19 +Discription: +Environment: +''' +import copy +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F +from torch.distributions import Normal +import numpy as np +import random +device=torch.device("cuda" if torch.cuda.is_available() else "cpu") +class ReplayBuffer: + def __init__(self, capacity): + self.capacity = capacity # 经验回放的容量 + self.buffer = [] # 缓冲区 + self.position = 0 + + def push(self, state, action, reward, next_state, done): + ''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition) + ''' + if len(self.buffer) < self.capacity: + self.buffer.append(None) + self.buffer[self.position] = (state, action, reward, next_state, done) + self.position = (self.position + 1) % self.capacity + + def sample(self, batch_size): + batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移 + state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等 + return state, action, reward, next_state, done + + def __len__(self): + ''' 返回当前存储的量 + ''' + return len(self.buffer) + +class ValueNet(nn.Module): + def __init__(self, n_states, hidden_dim, init_w=3e-3): + super(ValueNet, self).__init__() + + self.linear1 = nn.Linear(n_states, hidden_dim) + self.linear2 = nn.Linear(hidden_dim, hidden_dim) + self.linear3 = nn.Linear(hidden_dim, 1) + + self.linear3.weight.data.uniform_(-init_w, init_w) + self.linear3.bias.data.uniform_(-init_w, init_w) + + def forward(self, state): + x = F.relu(self.linear1(state)) + x = F.relu(self.linear2(x)) + x = self.linear3(x) + return x + + +class SoftQNet(nn.Module): + def __init__(self, n_states, n_actions, hidden_dim, init_w=3e-3): + super(SoftQNet, self).__init__() + + self.linear1 = nn.Linear(n_states + n_actions, hidden_dim) + self.linear2 = nn.Linear(hidden_dim, hidden_dim) + self.linear3 = nn.Linear(hidden_dim, 1) + + self.linear3.weight.data.uniform_(-init_w, init_w) + self.linear3.bias.data.uniform_(-init_w, init_w) + + def forward(self, state, action): + x = torch.cat([state, action], 1) + x = F.relu(self.linear1(x)) + x = F.relu(self.linear2(x)) + x = self.linear3(x) + return x + + +class PolicyNet(nn.Module): + def __init__(self, n_states, n_actions, hidden_dim, init_w=3e-3, log_std_min=-20, log_std_max=2): + super(PolicyNet, self).__init__() + + self.log_std_min = log_std_min + self.log_std_max = log_std_max + + self.linear1 = nn.Linear(n_states, hidden_dim) + self.linear2 = nn.Linear(hidden_dim, hidden_dim) + + self.mean_linear = nn.Linear(hidden_dim, n_actions) + self.mean_linear.weight.data.uniform_(-init_w, init_w) + self.mean_linear.bias.data.uniform_(-init_w, init_w) + + self.log_std_linear = nn.Linear(hidden_dim, n_actions) + self.log_std_linear.weight.data.uniform_(-init_w, init_w) + self.log_std_linear.bias.data.uniform_(-init_w, init_w) + + def forward(self, state): + x = F.relu(self.linear1(state)) + x = F.relu(self.linear2(x)) + + mean = self.mean_linear(x) + log_std = self.log_std_linear(x) + log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max) + + return mean, log_std + + def evaluate(self, state, epsilon=1e-6): + mean, log_std = self.forward(state) + std = log_std.exp() + + normal = Normal(mean, std) + z = normal.sample() + action = torch.tanh(z) + + log_prob = normal.log_prob(z) - torch.log(1 - action.pow(2) + epsilon) + log_prob = log_prob.sum(-1, keepdim=True) + + return action, log_prob, z, mean, log_std + + + def get_action(self, state): + state = torch.FloatTensor(state).unsqueeze(0).to(device) + mean, log_std = self.forward(state) + std = log_std.exp() + + normal = Normal(mean, std) + z = normal.sample() + action = torch.tanh(z) + + action = action.detach().cpu().numpy() + return action[0] + +class SAC: + def __init__(self,n_states,n_actions,cfg) -> None: + self.batch_size = cfg.batch_size + self.memory = ReplayBuffer(cfg.capacity) + self.device = cfg.device + self.value_net = ValueNet(n_states, cfg.hidden_dim).to(self.device) + self.target_value_net = ValueNet(n_states, cfg.hidden_dim).to(self.device) + self.soft_q_net = SoftQNet(n_states, n_actions, cfg.hidden_dim).to(self.device) + self.policy_net = PolicyNet(n_states, n_actions, cfg.hidden_dim).to(self.device) + self.value_optimizer = optim.Adam(self.value_net.parameters(), lr=cfg.value_lr) + self.soft_q_optimizer = optim.Adam(self.soft_q_net.parameters(), lr=cfg.soft_q_lr) + self.policy_optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.policy_lr) + for target_param, param in zip(self.target_value_net.parameters(), self.value_net.parameters()): + target_param.data.copy_(param.data) + self.value_criterion = nn.MSELoss() + self.soft_q_criterion = nn.MSELoss() + def update(self, gamma=0.99,mean_lambda=1e-3, + std_lambda=1e-3, + z_lambda=0.0, + soft_tau=1e-2, + ): + if len(self.memory) < self.batch_size: + return + state, action, reward, next_state, done = self.memory.sample(self.batch_size) + state = torch.FloatTensor(state).to(self.device) + next_state = torch.FloatTensor(next_state).to(self.device) + action = torch.FloatTensor(action).to(self.device) + reward = torch.FloatTensor(reward).unsqueeze(1).to(self.device) + done = torch.FloatTensor(np.float32(done)).unsqueeze(1).to(self.device) + expected_q_value = self.soft_q_net(state, action) + expected_value = self.value_net(state) + new_action, log_prob, z, mean, log_std = self.policy_net.evaluate(state) + + + target_value = self.target_value_net(next_state) + next_q_value = reward + (1 - done) * gamma * target_value + q_value_loss = self.soft_q_criterion(expected_q_value, next_q_value.detach()) + + expected_new_q_value = self.soft_q_net(state, new_action) + next_value = expected_new_q_value - log_prob + value_loss = self.value_criterion(expected_value, next_value.detach()) + + log_prob_target = expected_new_q_value - expected_value + policy_loss = (log_prob * (log_prob - log_prob_target).detach()).mean() + + + mean_loss = mean_lambda * mean.pow(2).mean() + std_loss = std_lambda * log_std.pow(2).mean() + z_loss = z_lambda * z.pow(2).sum(1).mean() + + policy_loss += mean_loss + std_loss + z_loss + + self.soft_q_optimizer.zero_grad() + q_value_loss.backward() + self.soft_q_optimizer.step() + + self.value_optimizer.zero_grad() + value_loss.backward() + self.value_optimizer.step() + + self.policy_optimizer.zero_grad() + policy_loss.backward() + self.policy_optimizer.step() + + for target_param, param in zip(self.target_value_net.parameters(), self.value_net.parameters()): + target_param.data.copy_( + target_param.data * (1.0 - soft_tau) + param.data * soft_tau + ) + def save(self, path): + torch.save(self.value_net.state_dict(), path + "sac_value") + torch.save(self.value_optimizer.state_dict(), path + "sac_value_optimizer") + torch.save(self.soft_q_net.state_dict(), path + "sac_soft_q") + torch.save(self.soft_q_optimizer.state_dict(), path + "sac_soft_q_optimizer") + + torch.save(self.policy_net.state_dict(), path + "sac_policy") + torch.save(self.policy_optimizer.state_dict(), path + "sac_policy_optimizer") + + def load(self, path): + self.value_net.load_state_dict(torch.load(path + "sac_value")) + self.value_optimizer.load_state_dict(torch.load(path + "sac_value_optimizer")) + self.target_value_net = copy.deepcopy(self.value_net) + + self.soft_q_net.load_state_dict(torch.load(path + "sac_soft_q")) + self.soft_q_optimizer.load_state_dict(torch.load(path + "sac_soft_q_optimizer")) + + self.policy_net.load_state_dict(torch.load(path + "sac_policy")) + self.policy_optimizer.load_state_dict(torch.load(path + "sac_policy_optimizer")) \ No newline at end of file diff --git a/codes/SAC/task0_train.py b/codes/SoftActorCritic/task0.py similarity index 50% rename from codes/SAC/task0_train.py rename to codes/SoftActorCritic/task0.py index 719b668..668d289 100644 --- a/codes/SAC/task0_train.py +++ b/codes/SoftActorCritic/task0.py @@ -5,7 +5,7 @@ Author: JiangJi Email: johnjim0816@gmail.com Date: 2021-04-29 12:59:22 LastEditor: JiangJi -LastEditTime: 2021-05-06 16:58:01 +LastEditTime: 2021-12-22 16:27:13 Discription: Environment: ''' @@ -18,23 +18,24 @@ import gym import torch import datetime -from SAC.env import NormalizedActions -from SAC.agent import SAC +from SoftActorCritic.env_wrapper import NormalizedActions +from SoftActorCritic.sac import SAC from common.utils import save_results, make_dir -from common.plot import plot_rewards +from common.utils import plot_rewards curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 +algo_name = 'SAC' # 算法名称 +env_name = 'Pendulum-v1' # 环境名称 +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测GPU class SACConfig: def __init__(self) -> None: - self.algo = 'SAC' - self.env_name = 'Pendulum-v1' - self.result_path = curr_path+"/outputs/" +self.env_name+'/'+curr_time+'/results/' # path to save results - self.model_path = curr_path+"/outputs/" +self.env_name+'/'+curr_time+'/models/' # path to save models + self.algo_name = algo_name + self.env_name = env_name # 环境名称 + self.device= device self.train_eps = 300 - self.train_steps = 500 - self.test_eps = 50 - self.eval_steps = 500 + self.test_eps = 20 + self.max_steps = 500 # 每回合的最大步数 self.gamma = 0.99 self.mean_lambda=1e-3 self.std_lambda=1e-3 @@ -46,33 +47,36 @@ class SACConfig: self.capacity = 1000000 self.hidden_dim = 256 self.batch_size = 128 - self.device=torch.device("cuda" if torch.cuda.is_available() else "cpu") -class PlotConfig(SACConfig): - def __init__(self) -> None: - super().__init__() - self.result_path = curr_path+"/outputs/" + self.env_name + \ - '/'+curr_time+'/results/' # 保存结果的路径 - self.model_path = curr_path+"/outputs/" + self.env_name + \ - '/'+curr_time+'/models/' # 保存模型的路径 - self.save = True # 是否保存图片 + + +class PlotConfig: + def __init__(self) -> None: + self.algo_name = algo_name # 算法名称 + self.env_name = env_name # 环境名称 + self.device= device + self.result_path = curr_path + "/outputs/" + self.env_name + \ + '/' + curr_time + '/results/' # 保存结果的路径 + self.model_path = curr_path + "/outputs/" + self.env_name + \ + '/' + curr_time + '/models/' # 保存模型的路径 + self.save = True # 是否保存图片 def env_agent_config(cfg,seed=1): env = NormalizedActions(gym.make(cfg.env_name)) env.seed(seed) - action_dim = env.action_space.shape[0] - state_dim = env.observation_space.shape[0] - agent = SAC(state_dim,action_dim,cfg) + n_actions = env.action_space.shape[0] + n_states = env.observation_space.shape[0] + agent = SAC(n_states,n_actions,cfg) return env,agent def train(cfg,env,agent): print('开始训练!') - print(f'环境:{cfg.env_name}, 算法:{cfg.algo}, 设备:{cfg.device}') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') rewards = [] # 记录所有回合的奖励 ma_rewards = [] # 记录所有回合的滑动平均奖励 for i_ep in range(cfg.train_eps): ep_reward = 0 # 记录一回合内的奖励 state = env.reset() # 重置环境,返回初始状态 - for i_step in range(cfg.train_steps): + for i_step in range(cfg.max_steps): action = agent.policy_net.get_action(state) next_state, reward, done, _ = env.step(action) agent.memory.push(state, action, reward, next_state, done) @@ -81,57 +85,57 @@ def train(cfg,env,agent): ep_reward += reward if done: break - if (i_ep+1)%10==0: - print(f"Episode:{i_ep+1}/{cfg.train_eps}, Reward:{ep_reward:.3f}") rewards.append(ep_reward) if ma_rewards: ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward) else: ma_rewards.append(ep_reward) - print('Complete training!') + if (i_ep+1)%10 == 0: + print(f'回合:{i_ep+1}/{cfg.train_eps}, 奖励:{ep_reward:.3f}') + print('完成训练!') return rewards, ma_rewards -def eval(cfg,env,agent): - print('Start to eval !') - print(f'Env: {cfg.env_name}, Algorithm: {cfg.algo}, Device: {cfg.device}') - rewards = [] - ma_rewards = [] # moveing average reward +def test(cfg,env,agent): + print('开始测试!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + rewards = [] # 记录所有回合的奖励 + ma_rewards = [] # 记录所有回合的滑动平均奖励 for i_ep in range(cfg.test_eps): state = env.reset() ep_reward = 0 - for i_step in range(cfg.eval_steps): + for i_step in range(cfg.max_steps): action = agent.policy_net.get_action(state) next_state, reward, done, _ = env.step(action) state = next_state ep_reward += reward if done: break - if (i_ep+1)%10==0: - print(f"Episode:{i_ep+1}/{cfg.train_eps}, Reward:{ep_reward:.3f}") rewards.append(ep_reward) if ma_rewards: ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward) else: ma_rewards.append(ep_reward) - print('Complete evaling!') + print(f"回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.1f}") + print('完成测试!') return rewards, ma_rewards if __name__ == "__main__": cfg=SACConfig() plot_cfg = PlotConfig() - # train - env,agent = env_agent_config(cfg,seed=1) + # 训练 + env, agent = env_agent_config(cfg, seed=1) rewards, ma_rewards = train(cfg, env, agent) - make_dir(plot_cfg.result_path, plot_cfg.model_path) - agent.save(path=plot_cfg.model_path) - save_results(rewards, ma_rewards, tag='train', path=plot_cfg.result_path) - plot_rewards(rewards, ma_rewards, plot_cfg, tag="train") - # eval - env,agent = env_agent_config(cfg,seed=10) - agent.load(path=plot_cfg.model_path) - rewards,ma_rewards = eval(cfg,env,agent) - save_results(rewards,ma_rewards,tag='eval',path=plot_cfg.result_path) - plot_rewards(rewards,ma_rewards,plot_cfg,tag="eval") + make_dir(plot_cfg.result_path, plot_cfg.model_path) # 创建保存结果和模型路径的文件夹 + agent.save(path=plot_cfg.model_path) # 保存模型 + save_results(rewards, ma_rewards, tag='train', + path=plot_cfg.result_path) # 保存结果 + plot_rewards(rewards, ma_rewards, plot_cfg, tag="train") # 画出结果 + # 测试 + env, agent = env_agent_config(cfg, seed=10) + agent.load(path=plot_cfg.model_path) # 导入模型 + rewards, ma_rewards = test(cfg, env, agent) + save_results(rewards, ma_rewards, tag='test', path=plot_cfg.result_path) # 保存结果 + plot_rewards(rewards, ma_rewards, plot_cfg, tag="test") # 画出结果 diff --git a/codes/SAC/task0_train.ipynb b/codes/SoftActorCritic/task0_train.ipynb similarity index 94% rename from codes/SAC/task0_train.ipynb rename to codes/SoftActorCritic/task0_train.ipynb index 14be84e..3be10c6 100644 --- a/codes/SAC/task0_train.ipynb +++ b/codes/SoftActorCritic/task0_train.ipynb @@ -70,9 +70,9 @@ "def env_agent_config(cfg,seed=1):\n", " env = NormalizedActions(gym.make(\"Pendulum-v0\"))\n", " env.seed(seed)\n", - " action_dim = env.action_space.shape[0]\n", - " state_dim = env.observation_space.shape[0]\n", - " agent = SAC(state_dim,action_dim,cfg)\n", + " n_actions = env.action_space.shape[0]\n", + " n_states = env.observation_space.shape[0]\n", + " agent = SAC(n_states,n_actions,cfg)\n", " return env,agent" ] }, @@ -159,7 +159,7 @@ "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mDeprecatedEnv\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# train\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0menv\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0magent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv_agent_config\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mrewards\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mma_rewards\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0magent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mmake_dir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcfg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36menv_agent_config\u001b[0;34m(cfg, seed)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0menv_agent_config\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0menv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNormalizedActions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgym\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Pendulum-v0\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0maction_dim\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maction_space\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mstate_dim\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobservation_space\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36menv_agent_config\u001b[0;34m(cfg, seed)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0menv_agent_config\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0menv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNormalizedActions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgym\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Pendulum-v0\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mn_actions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maction_space\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mn_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobservation_space\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/py37/lib/python3.7/site-packages/gym/envs/registration.py\u001b[0m in \u001b[0;36mmake\u001b[0;34m(id, **kwargs)\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 234\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 235\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mregistry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 236\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 237\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/py37/lib/python3.7/site-packages/gym/envs/registration.py\u001b[0m in \u001b[0;36mmake\u001b[0;34m(self, path, **kwargs)\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Making new env: %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 128\u001b[0;31m \u001b[0mspec\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspec\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 129\u001b[0m \u001b[0menv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/py37/lib/python3.7/site-packages/gym/envs/registration.py\u001b[0m in \u001b[0;36mspec\u001b[0;34m(self, path)\u001b[0m\n\u001b[1;32m 185\u001b[0m raise error.DeprecatedEnv(\n\u001b[1;32m 186\u001b[0m \"Env {} not found (valid versions include {})\".format(\n\u001b[0;32m--> 187\u001b[0;31m \u001b[0mid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmatching_envs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 188\u001b[0m )\n\u001b[1;32m 189\u001b[0m )\n", diff --git a/codes/TD3/memory.py b/codes/TD3/memory.py index 7e2671c..bcf38bb 100644 --- a/codes/TD3/memory.py +++ b/codes/TD3/memory.py @@ -14,13 +14,13 @@ import torch class ReplayBuffer(object): - def __init__(self, state_dim, action_dim, max_size=int(1e6)): + def __init__(self, n_states, n_actions, max_size=int(1e6)): self.max_size = max_size self.ptr = 0 self.size = 0 - self.state = np.zeros((max_size, state_dim)) - self.action = np.zeros((max_size, action_dim)) - self.next_state = np.zeros((max_size, state_dim)) + self.state = np.zeros((max_size, n_states)) + self.action = np.zeros((max_size, n_actions)) + self.next_state = np.zeros((max_size, n_states)) self.reward = np.zeros((max_size, 1)) self.not_done = np.zeros((max_size, 1)) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") diff --git a/codes/TD3/task0_eval.py b/codes/TD3/task0_eval.py index 0420dce..cb977b4 100644 --- a/codes/TD3/task0_eval.py +++ b/codes/TD3/task0_eval.py @@ -74,10 +74,10 @@ if __name__ == "__main__": env.seed(cfg.seed) # Set seeds torch.manual_seed(cfg.seed) np.random.seed(cfg.seed) - state_dim = env.observation_space.shape[0] - action_dim = env.action_space.shape[0] + n_states = env.observation_space.shape[0] + n_actions = env.action_space.shape[0] max_action = float(env.action_space.high[0]) - td3= TD3(state_dim,action_dim,max_action,cfg) + td3= TD3(n_states,n_actions,max_action,cfg) cfg.model_path = './TD3/results/HalfCheetah-v2/20210416-130341/models/' td3.load(cfg.model_path) td3_rewards,td3_ma_rewards = eval(cfg.env,td3,cfg.seed) diff --git a/codes/TD3/task0_train.py b/codes/TD3/task0_train.py index 11e2adf..58e4af9 100644 --- a/codes/TD3/task0_train.py +++ b/codes/TD3/task0_train.py @@ -72,7 +72,7 @@ def train(cfg,env,agent): else: action = ( agent.choose_action(np.array(state)) - + np.random.normal(0, max_action * cfg.expl_noise, size=action_dim) + + np.random.normal(0, max_action * cfg.expl_noise, size=n_actions) ).clip(-max_action, max_action) # Perform action next_state, reward, done, _ = env.step(action) @@ -121,11 +121,11 @@ def train(cfg,env,agent): # else: # action = ( # agent.choose_action(np.array(state)) -# + np.random.normal(0, max_action * cfg.expl_noise, size=action_dim) +# + np.random.normal(0, max_action * cfg.expl_noise, size=n_actions) # ).clip(-max_action, max_action) # # action = ( # # agent.choose_action(np.array(state)) -# # + np.random.normal(0, max_action * cfg.expl_noise, size=action_dim) +# # + np.random.normal(0, max_action * cfg.expl_noise, size=n_actions) # # ).clip(-max_action, max_action) # # Perform action # next_state, reward, done, _ = env.step(action) @@ -157,10 +157,10 @@ if __name__ == "__main__": env.seed(cfg.seed) # Set seeds torch.manual_seed(cfg.seed) np.random.seed(cfg.seed) - state_dim = env.observation_space.shape[0] - action_dim = env.action_space.shape[0] + n_states = env.observation_space.shape[0] + n_actions = env.action_space.shape[0] max_action = float(env.action_space.high[0]) - agent = TD3(state_dim,action_dim,max_action,cfg) + agent = TD3(n_states,n_actions,max_action,cfg) rewards,ma_rewards = train(cfg,env,agent) make_dir(cfg.result_path,cfg.model_path) agent.save(path=cfg.model_path) diff --git a/codes/TD3/task1_eval.py b/codes/TD3/task1_eval.py index ae17681..0d28c48 100644 --- a/codes/TD3/task1_eval.py +++ b/codes/TD3/task1_eval.py @@ -70,10 +70,10 @@ if __name__ == "__main__": env.seed(cfg.seed) # Set seeds torch.manual_seed(cfg.seed) np.random.seed(cfg.seed) - state_dim = env.observation_space.shape[0] - action_dim = env.action_space.shape[0] + n_states = env.observation_space.shape[0] + n_actions = env.action_space.shape[0] max_action = float(env.action_space.high[0]) - td3= TD3(state_dim,action_dim,max_action,cfg) + td3= TD3(n_states,n_actions,max_action,cfg) cfg.model_path = './TD3/results/Pendulum-v0/20210428-092059/models/' cfg.result_path = './TD3/results/Pendulum-v0/20210428-092059/results/' td3.load(cfg.model_path) diff --git a/codes/TD3/task1_train.py b/codes/TD3/task1_train.py index 9780f76..868f686 100644 --- a/codes/TD3/task1_train.py +++ b/codes/TD3/task1_train.py @@ -79,7 +79,7 @@ def train(cfg,env,agent): else: action = ( agent.choose_action(np.array(state)) - + np.random.normal(0, max_action * cfg.expl_noise, size=action_dim) + + np.random.normal(0, max_action * cfg.expl_noise, size=n_actions) ).clip(-max_action, max_action) # Perform action next_state, reward, done, _ = env.step(action) @@ -109,10 +109,10 @@ if __name__ == "__main__": env.seed(1) # 随机种子 torch.manual_seed(1) np.random.seed(1) - state_dim = env.observation_space.shape[0] - action_dim = env.action_space.shape[0] + n_states = env.observation_space.shape[0] + n_actions = env.action_space.shape[0] max_action = float(env.action_space.high[0]) - agent = TD3(state_dim,action_dim,max_action,cfg) + agent = TD3(n_states,n_actions,max_action,cfg) rewards,ma_rewards = train(cfg,env,agent) make_dir(plot_cfg.result_path,plot_cfg.model_path) agent.save(path=plot_cfg.model_path) diff --git a/codes/common/atari_wrappers.py b/codes/common/atari_wrappers.py new file mode 100644 index 0000000..48dab94 --- /dev/null +++ b/codes/common/atari_wrappers.py @@ -0,0 +1,284 @@ +import numpy as np +import os +os.environ.setdefault('PATH', '') +from collections import deque +import gym +from gym import spaces +import cv2 +cv2.ocl.setUseOpenCL(False) +from .wrappers import TimeLimit + + +class NoopResetEnv(gym.Wrapper): + def __init__(self, env, noop_max=30): + """Sample initial states by taking random number of no-ops on reset. + No-op is assumed to be action 0. + """ + gym.Wrapper.__init__(self, env) + self.noop_max = noop_max + self.override_num_noops = None + self.noop_action = 0 + assert env.unwrapped.get_action_meanings()[0] == 'NOOP' + + def reset(self, **kwargs): + """ Do no-op action for a number of steps in [1, noop_max].""" + self.env.reset(**kwargs) + if self.override_num_noops is not None: + noops = self.override_num_noops + else: + noops = self.unwrapped.np_random.randint(1, self.noop_max + 1) #pylint: disable=E1101 + assert noops > 0 + obs = None + for _ in range(noops): + obs, _, done, _ = self.env.step(self.noop_action) + if done: + obs = self.env.reset(**kwargs) + return obs + + def step(self, ac): + return self.env.step(ac) + +class FireResetEnv(gym.Wrapper): + def __init__(self, env): + """Take action on reset for environments that are fixed until firing.""" + gym.Wrapper.__init__(self, env) + assert env.unwrapped.get_action_meanings()[1] == 'FIRE' + assert len(env.unwrapped.get_action_meanings()) >= 3 + + def reset(self, **kwargs): + self.env.reset(**kwargs) + obs, _, done, _ = self.env.step(1) + if done: + self.env.reset(**kwargs) + obs, _, done, _ = self.env.step(2) + if done: + self.env.reset(**kwargs) + return obs + + def step(self, ac): + return self.env.step(ac) + +class EpisodicLifeEnv(gym.Wrapper): + def __init__(self, env): + """Make end-of-life == end-of-episode, but only reset on true game over. + Done by DeepMind for the DQN and co. since it helps value estimation. + """ + gym.Wrapper.__init__(self, env) + self.lives = 0 + self.was_real_done = True + + def step(self, action): + obs, reward, done, info = self.env.step(action) + self.was_real_done = done + # check current lives, make loss of life terminal, + # then update lives to handle bonus lives + lives = self.env.unwrapped.ale.lives() + if lives < self.lives and lives > 0: + # for Qbert sometimes we stay in lives == 0 condition for a few frames + # so it's important to keep lives > 0, so that we only reset once + # the environment advertises done. + done = True + self.lives = lives + return obs, reward, done, info + + def reset(self, **kwargs): + """Reset only when lives are exhausted. + This way all states are still reachable even though lives are episodic, + and the learner need not know about any of this behind-the-scenes. + """ + if self.was_real_done: + obs = self.env.reset(**kwargs) + else: + # no-op step to advance from terminal/lost life state + obs, _, _, _ = self.env.step(0) + self.lives = self.env.unwrapped.ale.lives() + return obs + +class MaxAndSkipEnv(gym.Wrapper): + def __init__(self, env, skip=4): + """Return only every `skip`-th frame""" + gym.Wrapper.__init__(self, env) + # most recent raw observations (for max pooling across time steps) + self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.uint8) + self._skip = skip + + def step(self, action): + """Repeat action, sum reward, and max over last observations.""" + total_reward = 0.0 + done = None + for i in range(self._skip): + obs, reward, done, info = self.env.step(action) + if i == self._skip - 2: self._obs_buffer[0] = obs + if i == self._skip - 1: self._obs_buffer[1] = obs + total_reward += reward + if done: + break + # Note that the observation on the done=True frame + # doesn't matter + max_frame = self._obs_buffer.max(axis=0) + + return max_frame, total_reward, done, info + + def reset(self, **kwargs): + return self.env.reset(**kwargs) + +class ClipRewardEnv(gym.RewardWrapper): + def __init__(self, env): + gym.RewardWrapper.__init__(self, env) + + def reward(self, reward): + """Bin reward to {+1, 0, -1} by its sign.""" + return np.sign(reward) + + +class WarpFrame(gym.ObservationWrapper): + def __init__(self, env, width=84, height=84, grayscale=True, dict_space_key=None): + """ + Warp frames to 84x84 as done in the Nature paper and later work. + If the environment uses dictionary observations, `dict_space_key` can be specified which indicates which + observation should be warped. + """ + super().__init__(env) + self._width = width + self._height = height + self._grayscale = grayscale + self._key = dict_space_key + if self._grayscale: + num_colors = 1 + else: + num_colors = 3 + + new_space = gym.spaces.Box( + low=0, + high=255, + shape=(self._height, self._width, num_colors), + dtype=np.uint8, + ) + if self._key is None: + original_space = self.observation_space + self.observation_space = new_space + else: + original_space = self.observation_space.spaces[self._key] + self.observation_space.spaces[self._key] = new_space + assert original_space.dtype == np.uint8 and len(original_space.shape) == 3 + + def observation(self, obs): + if self._key is None: + frame = obs + else: + frame = obs[self._key] + + if self._grayscale: + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) + frame = cv2.resize( + frame, (self._width, self._height), interpolation=cv2.INTER_AREA + ) + if self._grayscale: + frame = np.expand_dims(frame, -1) + + if self._key is None: + obs = frame + else: + obs = obs.copy() + obs[self._key] = frame + return obs + + +class FrameStack(gym.Wrapper): + def __init__(self, env, k): + """Stack k last frames. + Returns lazy array, which is much more memory efficient. + See Also + -------- + baselines.common.atari_wrappers.LazyFrames + """ + gym.Wrapper.__init__(self, env) + self.k = k + self.frames = deque([], maxlen=k) + shp = env.observation_space.shape + self.observation_space = spaces.Box(low=0, high=255, shape=(shp[:-1] + (shp[-1] * k,)), dtype=env.observation_space.dtype) + + def reset(self): + ob = self.env.reset() + for _ in range(self.k): + self.frames.append(ob) + return self._get_ob() + + def step(self, action): + ob, reward, done, info = self.env.step(action) + self.frames.append(ob) + return self._get_ob(), reward, done, info + + def _get_ob(self): + assert len(self.frames) == self.k + return LazyFrames(list(self.frames)) + +class ScaledFloatFrame(gym.ObservationWrapper): + def __init__(self, env): + gym.ObservationWrapper.__init__(self, env) + self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32) + + def observation(self, observation): + # careful! This undoes the memory optimization, use + # with smaller replay buffers only. + return np.array(observation).astype(np.float32) / 255.0 + +class LazyFrames(object): + def __init__(self, frames): + """This object ensures that common frames between the observations are only stored once. + It exists purely to optimize memory usage which can be huge for DQN's 1M frames replay + buffers. + This object should only be converted to numpy array before being passed to the model. + You'd not believe how complex the previous solution was.""" + self._frames = frames + self._out = None + + def _force(self): + if self._out is None: + self._out = np.concatenate(self._frames, axis=-1) + self._frames = None + return self._out + + def __array__(self, dtype=None): + out = self._force() + if dtype is not None: + out = out.astype(dtype) + return out + + def __len__(self): + return len(self._force()) + + def __getitem__(self, i): + return self._force()[i] + + def count(self): + frames = self._force() + return frames.shape[frames.ndim - 1] + + def frame(self, i): + return self._force()[..., i] + +def make_atari(env_id, max_episode_steps=None): + env = gym.make(env_id) + assert 'NoFrameskip' in env.spec.id + env = NoopResetEnv(env, noop_max=30) + env = MaxAndSkipEnv(env, skip=4) + if max_episode_steps is not None: + env = TimeLimit(env, max_episode_steps=max_episode_steps) + return env + +def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False): + """Configure environment for DeepMind-style Atari. + """ + if episode_life: + env = EpisodicLifeEnv(env) + if 'FIRE' in env.unwrapped.get_action_meanings(): + env = FireResetEnv(env) + env = WarpFrame(env) + if scale: + env = ScaledFloatFrame(env) + if clip_rewards: + env = ClipRewardEnv(env) + if frame_stack: + env = FrameStack(env, 4) + return env \ No newline at end of file diff --git a/codes/common/model.py b/codes/common/model.py index 27e5e4e..1518df0 100644 --- a/codes/common/model.py +++ b/codes/common/model.py @@ -32,10 +32,10 @@ class MLP(nn.Module): return self.fc3(x) class Critic(nn.Module): - def __init__(self, n_obs, action_dim, hidden_size, init_w=3e-3): + def __init__(self, n_obs, n_actions, hidden_size, init_w=3e-3): super(Critic, self).__init__() - self.linear1 = nn.Linear(n_obs + action_dim, hidden_size) + self.linear1 = nn.Linear(n_obs + n_actions, hidden_size) self.linear2 = nn.Linear(hidden_size, hidden_size) self.linear3 = nn.Linear(hidden_size, 1) # 随机初始化为较小的值 @@ -51,11 +51,11 @@ class Critic(nn.Module): return x class Actor(nn.Module): - def __init__(self, n_obs, action_dim, hidden_size, init_w=3e-3): + def __init__(self, n_obs, n_actions, hidden_size, init_w=3e-3): super(Actor, self).__init__() self.linear1 = nn.Linear(n_obs, hidden_size) self.linear2 = nn.Linear(hidden_size, hidden_size) - self.linear3 = nn.Linear(hidden_size, action_dim) + self.linear3 = nn.Linear(hidden_size, n_actions) self.linear3.weight.data.uniform_(-init_w, init_w) self.linear3.bias.data.uniform_(-init_w, init_w) @@ -67,18 +67,18 @@ class Actor(nn.Module): return x class ActorCritic(nn.Module): - def __init__(self, state_dim, action_dim, hidden_dim=256): + def __init__(self, n_states, n_actions, hidden_dim=256): super(ActorCritic, self).__init__() self.critic = nn.Sequential( - nn.Linear(state_dim, hidden_dim), + nn.Linear(n_states, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 1) ) self.actor = nn.Sequential( - nn.Linear(state_dim, hidden_dim), + nn.Linear(n_states, hidden_dim), nn.ReLU(), - nn.Linear(hidden_dim, action_dim), + nn.Linear(hidden_dim, n_actions), nn.Softmax(dim=1), ) diff --git a/codes/common/wrappers.py b/codes/common/wrappers.py new file mode 100644 index 0000000..4793b36 --- /dev/null +++ b/codes/common/wrappers.py @@ -0,0 +1,29 @@ +import gym + +class TimeLimit(gym.Wrapper): + def __init__(self, env, max_episode_steps=None): + super(TimeLimit, self).__init__(env) + self._max_episode_steps = max_episode_steps + self._elapsed_steps = 0 + + def step(self, ac): + observation, reward, done, info = self.env.step(ac) + self._elapsed_steps += 1 + if self._elapsed_steps >= self._max_episode_steps: + done = True + info['TimeLimit.truncated'] = True + return observation, reward, done, info + + def reset(self, **kwargs): + self._elapsed_steps = 0 + return self.env.reset(**kwargs) + +class ClipActionsWrapper(gym.Wrapper): + def step(self, action): + import numpy as np + action = np.nan_to_num(action) + action = np.clip(action, self.action_space.low, self.action_space.high) + return self.env.step(action) + + def reset(self, **kwargs): + return self.env.reset(**kwargs) \ No newline at end of file diff --git a/codes/envs/blackjack.py b/codes/envs/blackjack.py index 6946895..87f02d2 100644 --- a/codes/envs/blackjack.py +++ b/codes/envs/blackjack.py @@ -77,7 +77,7 @@ class BlackjackEnv(gym.Env): self.natural = natural # Start the first game self._reset() # Number of - self.action_dim = 2 + self.n_actions = 2 def reset(self): return self._reset() diff --git a/codes/envs/cliff_walking.py b/codes/envs/cliff_walking.py index 73e33c7..05b9b2e 100644 --- a/codes/envs/cliff_walking.py +++ b/codes/envs/cliff_walking.py @@ -31,7 +31,7 @@ class CliffWalkingEnv(discrete.DiscreteEnv): self.shape = (4, 12) nS = np.prod(self.shape) - action_dim = 4 + n_actions = 4 # Cliff Location self._cliff = np.zeros(self.shape, dtype=np.bool) @@ -41,7 +41,7 @@ class CliffWalkingEnv(discrete.DiscreteEnv): P = {} for s in range(nS): position = np.unravel_index(s, self.shape) - P[s] = { a : [] for a in range(action_dim) } + P[s] = { a : [] for a in range(n_actions) } P[s][UP] = self._calculate_transition_prob(position, [-1, 0]) P[s][RIGHT] = self._calculate_transition_prob(position, [0, 1]) P[s][DOWN] = self._calculate_transition_prob(position, [1, 0]) @@ -51,7 +51,7 @@ class CliffWalkingEnv(discrete.DiscreteEnv): isd = np.zeros(nS) isd[np.ravel_multi_index((3,0), self.shape)] = 1.0 - super(CliffWalkingEnv, self).__init__(nS, action_dim, P, isd) + super(CliffWalkingEnv, self).__init__(nS, n_actions, P, isd) def render(self, mode='human', close=False): self._render(mode, close) diff --git a/codes/envs/gridworld.py b/codes/envs/gridworld.py index c4fd512..cf3aec2 100644 --- a/codes/envs/gridworld.py +++ b/codes/envs/gridworld.py @@ -37,7 +37,7 @@ class GridworldEnv(discrete.DiscreteEnv): self.shape = shape nS = np.prod(shape) - action_dim = 4 + n_actions = 4 MAX_Y = shape[0] MAX_X = shape[1] @@ -51,7 +51,7 @@ class GridworldEnv(discrete.DiscreteEnv): y, x = it.multi_index # P[s][a] = (prob, next_state, reward, is_done) - P[s] = {a : [] for a in range(action_dim)} + P[s] = {a : [] for a in range(n_actions)} is_done = lambda s: s == 0 or s == (nS - 1) reward = 0.0 if is_done(s) else -1.0 @@ -82,7 +82,7 @@ class GridworldEnv(discrete.DiscreteEnv): # This should not be used in any model-free learning algorithm self.P = P - super(GridworldEnv, self).__init__(nS, action_dim, P, isd) + super(GridworldEnv, self).__init__(nS, n_actions, P, isd) def _render(self, mode='human', close=False): """ Renders the current gridworld layout diff --git a/codes/envs/stochastic_mdp.py b/codes/envs/stochastic_mdp.py index 5770fa5..3c1ad4d 100644 --- a/codes/envs/stochastic_mdp.py +++ b/codes/envs/stochastic_mdp.py @@ -17,31 +17,31 @@ class StochasticMDP: def __init__(self): self.end = False self.curr_state = 2 - self.action_dim = 2 - self.state_dim = 6 + self.n_actions = 2 + self.n_states = 6 self.p_right = 0.5 def reset(self): self.end = False self.curr_state = 2 - state = np.zeros(self.state_dim) + state = np.zeros(self.n_states) state[self.curr_state - 1] = 1. return state def step(self, action): if self.curr_state != 1: if action == 1: - if random.random() < self.p_right and self.curr_state < self.state_dim: + if random.random() < self.p_right and self.curr_state < self.n_states: self.curr_state += 1 else: self.curr_state -= 1 if action == 0: self.curr_state -= 1 - if self.curr_state == self.state_dim: + if self.curr_state == self.n_states: self.end = True - state = np.zeros(self.state_dim) + state = np.zeros(self.n_states) state[self.curr_state - 1] = 1. if self.curr_state == 1: diff --git a/codes/envs/windy_gridworld.py b/codes/envs/windy_gridworld.py index ac9c66a..2a9d4a4 100644 --- a/codes/envs/windy_gridworld.py +++ b/codes/envs/windy_gridworld.py @@ -30,7 +30,7 @@ class WindyGridworldEnv(discrete.DiscreteEnv): self.shape = (7, 10) nS = np.prod(self.shape) - action_dim = 4 + n_actions = 4 # Wind strength winds = np.zeros(self.shape) @@ -41,7 +41,7 @@ class WindyGridworldEnv(discrete.DiscreteEnv): P = {} for s in range(nS): position = np.unravel_index(s, self.shape) - P[s] = { a : [] for a in range(action_dim) } + P[s] = { a : [] for a in range(n_actions) } P[s][UP] = self._calculate_transition_prob(position, [-1, 0], winds) P[s][RIGHT] = self._calculate_transition_prob(position, [0, 1], winds) P[s][DOWN] = self._calculate_transition_prob(position, [1, 0], winds) @@ -51,7 +51,7 @@ class WindyGridworldEnv(discrete.DiscreteEnv): isd = np.zeros(nS) isd[np.ravel_multi_index((3,0), self.shape)] = 1.0 - super(WindyGridworldEnv, self).__init__(nS, action_dim, P, isd) + super(WindyGridworldEnv, self).__init__(nS, n_actions, P, isd) def render(self, mode='human', close=False): self._render(mode, close)