update

2021-09-16 15:35:40 +08:00
parent 5085040330
commit 34fcebc4b8
31 changed files with 434 additions and 137 deletions
--- a/codes/QLearning/agent.py
+++ b/codes/QLearning/agent.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2020-09-11 23:03:00
 LastEditor: John
-LastEditTime: 2021-09-11 21:53:18
+LastEditTime: 2021-09-15 13:18:37
 Discription: use defaultdict to define Q table
 Environment: 
 '''
@@ -26,7 +26,6 @@ class QLearning(object):
        self.epsilon_end = cfg.epsilon_end
        self.epsilon_decay = cfg.epsilon_decay
        self.Q_table  = defaultdict(lambda: np.zeros(action_dim)) # A nested dictionary that maps state -> (action -> action-value)
-        
    def choose_action(self, state):
        self.sample_count += 1
        self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \
--- a/codes/QLearning/task0_train.py
+++ b/codes/QLearning/task0_train.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2020-09-11 23:03:00
 LastEditor: John
-LastEditTime: 2021-09-12 01:29:40
+LastEditTime: 2021-09-15 14:44:25
 Discription: 
 Environment: 
 '''
@@ -57,11 +57,11 @@ def train(cfg,env,agent):
    ma_rewards = [] # 滑动平均奖励
    for i_ep in range(cfg.train_eps):
        ep_reward = 0  # 记录每个回合的奖励
-        state = env.reset()  # 重置环境, 重新开一局（即开始新的一个episode）
+        state = env.reset()  # 重置环境,即开始新的回合
        while True:
            action = agent.choose_action(state)  # 根据算法选择一个动作
            next_state, reward, done, _ = env.step(action)  # 与环境进行一次动作交互
-            agent.update(state, action, reward, next_state, done)  # Q-learning算法更新
+            agent.update(state, action, reward, next_state, done)  # Q学习算法更新
            state = next_state  # 更新状态
            ep_reward += reward
            if done: