update ch1

2023-07-21 21:04:30 +08:00
parent 030185ac6d
commit ded3228a12
1 changed files with 54 additions and 0 deletions
--- a/docs/chapter1/RL_example.py
+++ b/docs/chapter1/RL_example.py
@@ -0,0 +1,54 @@
+import gym
+import numpy as np
+
+
+class BespokeAgent:
+    def __init__(self, env):
+        pass
+    
+    def decide(self, observation): # 决策
+        position, velocity = observation
+        lb = min(-0.09 * (position + 0.25) ** 2 + 0.03,
+                0.3 * (position + 0.9) ** 4 - 0.008)
+        ub = -0.07 * (position + 0.38) ** 2 + 0.07
+        if lb < velocity < ub:
+            action = 2
+        else:
+            action = 0
+        return action # 返回动作
+
+    def learn(self, *args): # 学习
+        pass
+    
+
+def play_montecarlo(env, agent, render=False, train=False):
+    episode_reward = 0. # 记录回合总奖励，初始化为0
+    observation = env.reset() # 重置游戏环境，开始新回合
+    while True: # 不断循环，直到回合结束
+        if render: # 判断是否显示
+            env.render() # 显示图形界面，图形界面可以用 env.close() 语句关闭
+        action = agent.decide(observation)
+        next_observation, reward, done, _ = env.step(action) # 执行动作
+        episode_reward += reward # 收集回合奖励
+        if train: # 判断是否训练智能体
+            agent.learn(observation, action, reward, done) # 学习
+        if done: # 回合结束，跳出循环
+            break
+        observation = next_observation
+    return episode_reward # 返回回合总奖励
+
+
+env = gym.make('MountainCar-v0')
+env.seed(3) # 设置随机数种子,只是为了让结果可以精确复现,一般情况下可删去
+agent = BespokeAgent(env)
+print('观测空间 = {}'.format(env.observation_space))
+print('动作空间 = {}'.format(env.action_space))
+print('观测范围 = {} ~ {}'.format(env.observation_space.low,
+        env.observation_space.high))
+print('动作数 = {}'.format(env.action_space.n))
+
+episode_reward = play_montecarlo(env, agent)
+print('回合奖励 = {}'.format(episode_reward))
+
+episode_rewards = [play_montecarlo(env, agent) for _ in range(100)]
+print('平均回合奖励 = {}'.format(np.mean(episode_rewards)))