From 621c81278d77de2184c95cd07573fb35a310f6e8 Mon Sep 17 00:00:00 2001 From: johnjim0816 Date: Thu, 9 Jun 2022 19:06:37 +0800 Subject: [PATCH] update --- codes/.gitignore | 5 +++++ codes/DDPG/ddpg.py | 8 ++++---- codes/DDPG/task0.py | 10 +++++----- codes/LICENSE | 21 +++++++++++++++++++++ 4 files changed, 35 insertions(+), 9 deletions(-) create mode 100644 codes/.gitignore create mode 100644 codes/LICENSE diff --git a/codes/.gitignore b/codes/.gitignore new file mode 100644 index 0000000..764cbb7 --- /dev/null +++ b/codes/.gitignore @@ -0,0 +1,5 @@ +.DS_Store +.ipynb_checkpoints +__pycache__ +.vscode +test.py \ No newline at end of file diff --git a/codes/DDPG/ddpg.py b/codes/DDPG/ddpg.py index 01ded1c..4d2ed42 100644 --- a/codes/DDPG/ddpg.py +++ b/codes/DDPG/ddpg.py @@ -5,7 +5,7 @@ @Email: johnjim0816@gmail.com @Date: 2020-06-09 20:25:52 @LastEditor: John -LastEditTime: 2021-09-16 00:55:30 +LastEditTime: 2022-06-09 19:04:44 @Discription: @Environment: python 3.7.7 ''' @@ -104,9 +104,9 @@ class DDPG: # 从经验回放中(replay memory)中随机采样一个批量的转移(transition) state, action, reward, next_state, done = self.memory.sample(self.batch_size) # 转变为张量 - state = torch.FloatTensor(state).to(self.device) - next_state = torch.FloatTensor(next_state).to(self.device) - action = torch.FloatTensor(action).to(self.device) + state = torch.FloatTensor(np.array(state)).to(self.device) + next_state = torch.FloatTensor(np.array(next_state)).to(self.device) + action = torch.FloatTensor(np.array(action)).to(self.device) reward = torch.FloatTensor(reward).unsqueeze(1).to(self.device) done = torch.FloatTensor(np.float32(done)).unsqueeze(1).to(self.device) diff --git a/codes/DDPG/task0.py b/codes/DDPG/task0.py index 04da4a9..f3d5bc2 100644 --- a/codes/DDPG/task0.py +++ b/codes/DDPG/task0.py @@ -5,11 +5,12 @@ @Email: johnjim0816@gmail.com @Date: 2020-06-11 20:58:21 @LastEditor: John -LastEditTime: 2022-02-10 06:23:27 +LastEditTime: 2022-06-09 19:05:20 @Discription: @Environment: python 3.7.7 ''' import sys,os +os.environ['KMP_DUPLICATE_LIB_OK']='True' curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 parent_path = os.path.dirname(curr_path) # 父路径 sys.path.append(parent_path) # 添加路径到系统路径sys.path @@ -20,7 +21,6 @@ import torch from env import NormalizedActions,OUNoise from ddpg import DDPG -from DDPG.train import train,test from common.utils import save_results,make_dir from common.utils import plot_rewards @@ -37,7 +37,7 @@ class Config: "cuda" if torch.cuda.is_available() else "cpu") # 检测GPUgjgjlkhfsf风刀霜的撒发十 self.seed = 10 # 随机种子,置0则不设置随机种子 self.train_eps = 300 # 训练的回合数 - self.test_eps = 50 # 测试的回合数 + self.test_eps = 20 # 测试的回合数 ################################################################################ ################################## 算法超参数 ################################### @@ -68,7 +68,7 @@ def env_agent_config(cfg,seed=1): return env,agent def train(cfg, env, agent): print('开始训练!') - print(f'环境:{cfg.env_name},算法:{cfg.algo},设备:{cfg.device}') + print(f'环境:{cfg.env_name},算法:{cfg.algo_name},设备:{cfg.device}') ou_noise = OUNoise(env.action_space) # 动作噪声 rewards = [] # 记录所有回合的奖励 ma_rewards = [] # 记录所有回合的滑动平均奖励 @@ -99,7 +99,7 @@ def train(cfg, env, agent): def test(cfg, env, agent): print('开始测试!') - print(f'环境:{cfg.env_name}, 算法:{cfg.algo}, 设备:{cfg.device}') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') rewards = [] # 记录所有回合的奖励 ma_rewards = [] # 记录所有回合的滑动平均奖励 for i_ep in range(cfg.test_eps): diff --git a/codes/LICENSE b/codes/LICENSE new file mode 100644 index 0000000..673d927 --- /dev/null +++ b/codes/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 John Jim + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.