update
This commit is contained in:
@@ -5,7 +5,7 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2020-09-11 23:03:00
|
||||
LastEditor: John
|
||||
LastEditTime: 2021-09-11 21:53:18
|
||||
LastEditTime: 2021-09-15 13:18:37
|
||||
Discription: use defaultdict to define Q table
|
||||
Environment:
|
||||
'''
|
||||
@@ -26,7 +26,6 @@ class QLearning(object):
|
||||
self.epsilon_end = cfg.epsilon_end
|
||||
self.epsilon_decay = cfg.epsilon_decay
|
||||
self.Q_table = defaultdict(lambda: np.zeros(action_dim)) # A nested dictionary that maps state -> (action -> action-value)
|
||||
|
||||
def choose_action(self, state):
|
||||
self.sample_count += 1
|
||||
self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \
|
||||
|
||||
@@ -5,7 +5,7 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2020-09-11 23:03:00
|
||||
LastEditor: John
|
||||
LastEditTime: 2021-09-12 01:29:40
|
||||
LastEditTime: 2021-09-15 14:44:25
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
@@ -57,11 +57,11 @@ def train(cfg,env,agent):
|
||||
ma_rewards = [] # 滑动平均奖励
|
||||
for i_ep in range(cfg.train_eps):
|
||||
ep_reward = 0 # 记录每个回合的奖励
|
||||
state = env.reset() # 重置环境, 重新开一局(即开始新的一个episode)
|
||||
state = env.reset() # 重置环境,即开始新的回合
|
||||
while True:
|
||||
action = agent.choose_action(state) # 根据算法选择一个动作
|
||||
next_state, reward, done, _ = env.step(action) # 与环境进行一次动作交互
|
||||
agent.update(state, action, reward, next_state, done) # Q-learning算法更新
|
||||
agent.update(state, action, reward, next_state, done) # Q学习算法更新
|
||||
state = next_state # 更新状态
|
||||
ep_reward += reward
|
||||
if done:
|
||||
|
||||
Reference in New Issue
Block a user