diff --git a/docs/chapter3/project1.md b/docs/chapter3/project1.md index d1c881a..9d46bb0 100644 --- a/docs/chapter3/project1.md +++ b/docs/chapter3/project1.md @@ -34,7 +34,7 @@ agent = QLearning( state_dim=env.observation_space.n, action_dim=env.action_space.n, learning_rate=cfg.policy_lr, - gamma=cfg.gamma, + gamma=cfg.gamma,) rewards = [] ma_rewards = [] # moving average reward for i_ep in range(cfg.train_eps): # train_eps: 训练的最大episodes数