From 621c81278d77de2184c95cd07573fb35a310f6e8 Mon Sep 17 00:00:00 2001
From: johnjim0816 <johnjim0816@gmail.com>
Date: Thu, 9 Jun 2022 19:06:37 +0800
Subject: [PATCH] update

---
 codes/.gitignore    |  5 +++++
 codes/DDPG/ddpg.py  |  8 ++++----
 codes/DDPG/task0.py | 10 +++++-----
 codes/LICENSE       | 21 +++++++++++++++++++++
 4 files changed, 35 insertions(+), 9 deletions(-)
 create mode 100644 codes/.gitignore
 create mode 100644 codes/LICENSE

diff --git a/codes/.gitignore b/codes/.gitignore
new file mode 100644
index 0000000..764cbb7
--- /dev/null
+++ b/codes/.gitignore
@@ -0,0 +1,5 @@
+.DS_Store
+.ipynb_checkpoints
+__pycache__
+.vscode
+test.py
\ No newline at end of file
diff --git a/codes/DDPG/ddpg.py b/codes/DDPG/ddpg.py
index 01ded1c..4d2ed42 100644
--- a/codes/DDPG/ddpg.py
+++ b/codes/DDPG/ddpg.py
@@ -5,7 +5,7 @@
 @Email: johnjim0816@gmail.com
 @Date: 2020-06-09 20:25:52
 @LastEditor: John
-LastEditTime: 2021-09-16 00:55:30
+LastEditTime: 2022-06-09 19:04:44
 @Discription: 
 @Environment: python 3.7.7
 '''
@@ -104,9 +104,9 @@ class DDPG:
         # 从经验回放中(replay memory)中随机采样一个批量的转移(transition)
         state, action, reward, next_state, done = self.memory.sample(self.batch_size)
         # 转变为张量
-        state = torch.FloatTensor(state).to(self.device)
-        next_state = torch.FloatTensor(next_state).to(self.device)
-        action = torch.FloatTensor(action).to(self.device)
+        state = torch.FloatTensor(np.array(state)).to(self.device)
+        next_state = torch.FloatTensor(np.array(next_state)).to(self.device)
+        action = torch.FloatTensor(np.array(action)).to(self.device)
         reward = torch.FloatTensor(reward).unsqueeze(1).to(self.device)
         done = torch.FloatTensor(np.float32(done)).unsqueeze(1).to(self.device)
        
diff --git a/codes/DDPG/task0.py b/codes/DDPG/task0.py
index 04da4a9..f3d5bc2 100644
--- a/codes/DDPG/task0.py
+++ b/codes/DDPG/task0.py
@@ -5,11 +5,12 @@
 @Email: johnjim0816@gmail.com
 @Date: 2020-06-11 20:58:21
 @LastEditor: John
-LastEditTime: 2022-02-10 06:23:27
+LastEditTime: 2022-06-09 19:05:20
 @Discription: 
 @Environment: python 3.7.7
 '''
 import sys,os
+os.environ['KMP_DUPLICATE_LIB_OK']='True'
 curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径
 parent_path = os.path.dirname(curr_path) # 父路径
 sys.path.append(parent_path) # 添加路径到系统路径sys.path
@@ -20,7 +21,6 @@ import torch
 
 from env import NormalizedActions,OUNoise
 from ddpg import DDPG
-from DDPG.train import train,test
 from common.utils import save_results,make_dir
 from common.utils import plot_rewards
 
@@ -37,7 +37,7 @@ class Config:
             "cuda" if torch.cuda.is_available() else "cpu")  # 检测GPUgjgjlkhfsf风刀霜的撒发十
         self.seed = 10 # 随机种子，置0则不设置随机种子
         self.train_eps = 300 # 训练的回合数
-        self.test_eps = 50 # 测试的回合数
+        self.test_eps = 20 # 测试的回合数
         ################################################################################
         
         ################################## 算法超参数 ###################################
@@ -68,7 +68,7 @@ def env_agent_config(cfg,seed=1):
     return env,agent
 def train(cfg, env, agent):
     print('开始训练！')
-    print(f'环境：{cfg.env_name}，算法：{cfg.algo}，设备：{cfg.device}')
+    print(f'环境：{cfg.env_name}，算法：{cfg.algo_name}，设备：{cfg.device}')
     ou_noise = OUNoise(env.action_space)  # 动作噪声
     rewards = [] # 记录所有回合的奖励
     ma_rewards = []  # 记录所有回合的滑动平均奖励
@@ -99,7 +99,7 @@ def train(cfg, env, agent):
 
 def test(cfg, env, agent):
     print('开始测试！')
-    print(f'环境：{cfg.env_name}, 算法：{cfg.algo}, 设备：{cfg.device}')
+    print(f'环境：{cfg.env_name}, 算法：{cfg.algo_name}, 设备：{cfg.device}')
     rewards = [] # 记录所有回合的奖励
     ma_rewards = []  # 记录所有回合的滑动平均奖励
     for i_ep in range(cfg.test_eps):
diff --git a/codes/LICENSE b/codes/LICENSE
new file mode 100644
index 0000000..673d927
--- /dev/null
+++ b/codes/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 John Jim
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.