update projects
This commit is contained in:
284
projects/codes/common/atari_wrappers.py
Normal file
284
projects/codes/common/atari_wrappers.py
Normal file
@@ -0,0 +1,284 @@
|
||||
import numpy as np
|
||||
import os
|
||||
os.environ.setdefault('PATH', '')
|
||||
from collections import deque
|
||||
import gym
|
||||
from gym import spaces
|
||||
import cv2
|
||||
cv2.ocl.setUseOpenCL(False)
|
||||
from .wrappers import TimeLimit
|
||||
|
||||
|
||||
class NoopResetEnv(gym.Wrapper):
|
||||
def __init__(self, env, noop_max=30):
|
||||
"""Sample initial states by taking random number of no-ops on reset.
|
||||
No-op is assumed to be action 0.
|
||||
"""
|
||||
gym.Wrapper.__init__(self, env)
|
||||
self.noop_max = noop_max
|
||||
self.override_num_noops = None
|
||||
self.noop_action = 0
|
||||
assert env.unwrapped.get_action_meanings()[0] == 'NOOP'
|
||||
|
||||
def reset(self, **kwargs):
|
||||
""" Do no-op action for a number of steps in [1, noop_max]."""
|
||||
self.env.reset(**kwargs)
|
||||
if self.override_num_noops is not None:
|
||||
noops = self.override_num_noops
|
||||
else:
|
||||
noops = self.unwrapped.np_random.randint(1, self.noop_max + 1) #pylint: disable=E1101
|
||||
assert noops > 0
|
||||
obs = None
|
||||
for _ in range(noops):
|
||||
obs, _, done, _ = self.env.step(self.noop_action)
|
||||
if done:
|
||||
obs = self.env.reset(**kwargs)
|
||||
return obs
|
||||
|
||||
def step(self, ac):
|
||||
return self.env.step(ac)
|
||||
|
||||
class FireResetEnv(gym.Wrapper):
|
||||
def __init__(self, env):
|
||||
"""Take action on reset for environments that are fixed until firing."""
|
||||
gym.Wrapper.__init__(self, env)
|
||||
assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
|
||||
assert len(env.unwrapped.get_action_meanings()) >= 3
|
||||
|
||||
def reset(self, **kwargs):
|
||||
self.env.reset(**kwargs)
|
||||
obs, _, done, _ = self.env.step(1)
|
||||
if done:
|
||||
self.env.reset(**kwargs)
|
||||
obs, _, done, _ = self.env.step(2)
|
||||
if done:
|
||||
self.env.reset(**kwargs)
|
||||
return obs
|
||||
|
||||
def step(self, ac):
|
||||
return self.env.step(ac)
|
||||
|
||||
class EpisodicLifeEnv(gym.Wrapper):
|
||||
def __init__(self, env):
|
||||
"""Make end-of-life == end-of-episode, but only reset on true game over.
|
||||
Done by DeepMind for the DQN and co. since it helps value estimation.
|
||||
"""
|
||||
gym.Wrapper.__init__(self, env)
|
||||
self.lives = 0
|
||||
self.was_real_done = True
|
||||
|
||||
def step(self, action):
|
||||
obs, reward, done, info = self.env.step(action)
|
||||
self.was_real_done = done
|
||||
# check current lives, make loss of life terminal,
|
||||
# then update lives to handle bonus lives
|
||||
lives = self.env.unwrapped.ale.lives()
|
||||
if lives < self.lives and lives > 0:
|
||||
# for Qbert sometimes we stay in lives == 0 condition for a few frames
|
||||
# so it's important to keep lives > 0, so that we only reset once
|
||||
# the environment advertises done.
|
||||
done = True
|
||||
self.lives = lives
|
||||
return obs, reward, done, info
|
||||
|
||||
def reset(self, **kwargs):
|
||||
"""Reset only when lives are exhausted.
|
||||
This way all states are still reachable even though lives are episodic,
|
||||
and the learner need not know about any of this behind-the-scenes.
|
||||
"""
|
||||
if self.was_real_done:
|
||||
obs = self.env.reset(**kwargs)
|
||||
else:
|
||||
# no-op step to advance from terminal/lost life state
|
||||
obs, _, _, _ = self.env.step(0)
|
||||
self.lives = self.env.unwrapped.ale.lives()
|
||||
return obs
|
||||
|
||||
class MaxAndSkipEnv(gym.Wrapper):
|
||||
def __init__(self, env, skip=4):
|
||||
"""Return only every `skip`-th frame"""
|
||||
gym.Wrapper.__init__(self, env)
|
||||
# most recent raw observations (for max pooling across time steps)
|
||||
self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.uint8)
|
||||
self._skip = skip
|
||||
|
||||
def step(self, action):
|
||||
"""Repeat action, sum reward, and max over last observations."""
|
||||
total_reward = 0.0
|
||||
done = None
|
||||
for i in range(self._skip):
|
||||
obs, reward, done, info = self.env.step(action)
|
||||
if i == self._skip - 2: self._obs_buffer[0] = obs
|
||||
if i == self._skip - 1: self._obs_buffer[1] = obs
|
||||
total_reward += reward
|
||||
if done:
|
||||
break
|
||||
# Note that the observation on the done=True frame
|
||||
# doesn't matter
|
||||
max_frame = self._obs_buffer.max(axis=0)
|
||||
|
||||
return max_frame, total_reward, done, info
|
||||
|
||||
def reset(self, **kwargs):
|
||||
return self.env.reset(**kwargs)
|
||||
|
||||
class ClipRewardEnv(gym.RewardWrapper):
|
||||
def __init__(self, env):
|
||||
gym.RewardWrapper.__init__(self, env)
|
||||
|
||||
def reward(self, reward):
|
||||
"""Bin reward to {+1, 0, -1} by its sign."""
|
||||
return np.sign(reward)
|
||||
|
||||
|
||||
class WarpFrame(gym.ObservationWrapper):
|
||||
def __init__(self, env, width=84, height=84, grayscale=True, dict_space_key=None):
|
||||
"""
|
||||
Warp frames to 84x84 as done in the Nature paper and later work.
|
||||
If the environment uses dictionary observations, `dict_space_key` can be specified which indicates which
|
||||
observation should be warped.
|
||||
"""
|
||||
super().__init__(env)
|
||||
self._width = width
|
||||
self._height = height
|
||||
self._grayscale = grayscale
|
||||
self._key = dict_space_key
|
||||
if self._grayscale:
|
||||
num_colors = 1
|
||||
else:
|
||||
num_colors = 3
|
||||
|
||||
new_space = gym.spaces.Box(
|
||||
low=0,
|
||||
high=255,
|
||||
shape=(self._height, self._width, num_colors),
|
||||
dtype=np.uint8,
|
||||
)
|
||||
if self._key is None:
|
||||
original_space = self.observation_space
|
||||
self.observation_space = new_space
|
||||
else:
|
||||
original_space = self.observation_space.spaces[self._key]
|
||||
self.observation_space.spaces[self._key] = new_space
|
||||
assert original_space.dtype == np.uint8 and len(original_space.shape) == 3
|
||||
|
||||
def observation(self, obs):
|
||||
if self._key is None:
|
||||
frame = obs
|
||||
else:
|
||||
frame = obs[self._key]
|
||||
|
||||
if self._grayscale:
|
||||
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
|
||||
frame = cv2.resize(
|
||||
frame, (self._width, self._height), interpolation=cv2.INTER_AREA
|
||||
)
|
||||
if self._grayscale:
|
||||
frame = np.expand_dims(frame, -1)
|
||||
|
||||
if self._key is None:
|
||||
obs = frame
|
||||
else:
|
||||
obs = obs.copy()
|
||||
obs[self._key] = frame
|
||||
return obs
|
||||
|
||||
|
||||
class FrameStack(gym.Wrapper):
|
||||
def __init__(self, env, k):
|
||||
"""Stack k last frames.
|
||||
Returns lazy array, which is much more memory efficient.
|
||||
See Also
|
||||
--------
|
||||
baselines.common.atari_wrappers.LazyFrames
|
||||
"""
|
||||
gym.Wrapper.__init__(self, env)
|
||||
self.k = k
|
||||
self.frames = deque([], maxlen=k)
|
||||
shp = env.observation_space.shape
|
||||
self.observation_space = spaces.Box(low=0, high=255, shape=(shp[:-1] + (shp[-1] * k,)), dtype=env.observation_space.dtype)
|
||||
|
||||
def reset(self):
|
||||
ob = self.env.reset()
|
||||
for _ in range(self.k):
|
||||
self.frames.append(ob)
|
||||
return self._get_ob()
|
||||
|
||||
def step(self, action):
|
||||
ob, reward, done, info = self.env.step(action)
|
||||
self.frames.append(ob)
|
||||
return self._get_ob(), reward, done, info
|
||||
|
||||
def _get_ob(self):
|
||||
assert len(self.frames) == self.k
|
||||
return LazyFrames(list(self.frames))
|
||||
|
||||
class ScaledFloatFrame(gym.ObservationWrapper):
|
||||
def __init__(self, env):
|
||||
gym.ObservationWrapper.__init__(self, env)
|
||||
self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)
|
||||
|
||||
def observation(self, observation):
|
||||
# careful! This undoes the memory optimization, use
|
||||
# with smaller replay buffers only.
|
||||
return np.array(observation).astype(np.float32) / 255.0
|
||||
|
||||
class LazyFrames(object):
|
||||
def __init__(self, frames):
|
||||
"""This object ensures that common frames between the observations are only stored once.
|
||||
It exists purely to optimize memory usage which can be huge for DQN's 1M frames replay
|
||||
buffers.
|
||||
This object should only be converted to numpy array before being passed to the model.
|
||||
You'd not believe how complex the previous solution was."""
|
||||
self._frames = frames
|
||||
self._out = None
|
||||
|
||||
def _force(self):
|
||||
if self._out is None:
|
||||
self._out = np.concatenate(self._frames, axis=-1)
|
||||
self._frames = None
|
||||
return self._out
|
||||
|
||||
def __array__(self, dtype=None):
|
||||
out = self._force()
|
||||
if dtype is not None:
|
||||
out = out.astype(dtype)
|
||||
return out
|
||||
|
||||
def __len__(self):
|
||||
return len(self._force())
|
||||
|
||||
def __getitem__(self, i):
|
||||
return self._force()[i]
|
||||
|
||||
def count(self):
|
||||
frames = self._force()
|
||||
return frames.shape[frames.ndim - 1]
|
||||
|
||||
def frame(self, i):
|
||||
return self._force()[..., i]
|
||||
|
||||
def make_atari(env_id, max_episode_steps=None):
|
||||
env = gym.make(env_id)
|
||||
assert 'NoFrameskip' in env.spec.id
|
||||
env = NoopResetEnv(env, noop_max=30)
|
||||
env = MaxAndSkipEnv(env, skip=4)
|
||||
if max_episode_steps is not None:
|
||||
env = TimeLimit(env, max_episode_steps=max_episode_steps)
|
||||
return env
|
||||
|
||||
def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False):
|
||||
"""Configure environment for DeepMind-style Atari.
|
||||
"""
|
||||
if episode_life:
|
||||
env = EpisodicLifeEnv(env)
|
||||
if 'FIRE' in env.unwrapped.get_action_meanings():
|
||||
env = FireResetEnv(env)
|
||||
env = WarpFrame(env)
|
||||
if scale:
|
||||
env = ScaledFloatFrame(env)
|
||||
if clip_rewards:
|
||||
env = ClipRewardEnv(env)
|
||||
if frame_stack:
|
||||
env = FrameStack(env, 4)
|
||||
return env
|
||||
36
projects/codes/common/memory.py
Normal file
36
projects/codes/common/memory.py
Normal file
@@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
@Author: John
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-10 15:27:16
|
||||
@LastEditor: John
|
||||
LastEditTime: 2021-09-15 14:52:37
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
import random
|
||||
class ReplayBuffer:
|
||||
def __init__(self, capacity):
|
||||
self.capacity = capacity # 经验回放的容量
|
||||
self.buffer = [] # 缓冲区
|
||||
self.position = 0
|
||||
|
||||
def push(self, state, action, reward, next_state, done):
|
||||
''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition)
|
||||
'''
|
||||
if len(self.buffer) < self.capacity:
|
||||
self.buffer.append(None)
|
||||
self.buffer[self.position] = (state, action, reward, next_state, done)
|
||||
self.position = (self.position + 1) % self.capacity
|
||||
|
||||
def sample(self, batch_size):
|
||||
batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移
|
||||
state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等
|
||||
return state, action, reward, next_state, done
|
||||
|
||||
def __len__(self):
|
||||
''' 返回当前存储的量
|
||||
'''
|
||||
return len(self.buffer)
|
||||
|
||||
89
projects/codes/common/model.py
Normal file
89
projects/codes/common/model.py
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2021-03-12 21:14:12
|
||||
LastEditor: John
|
||||
LastEditTime: 2021-09-15 13:21:03
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.distributions import Categorical
|
||||
|
||||
class MLP(nn.Module):
|
||||
def __init__(self, input_dim,output_dim,hidden_dim=128):
|
||||
""" 初始化q网络,为全连接网络
|
||||
input_dim: 输入的特征数即环境的状态维度
|
||||
output_dim: 输出的动作维度
|
||||
"""
|
||||
super(MLP, self).__init__()
|
||||
self.fc1 = nn.Linear(input_dim, hidden_dim) # 输入层
|
||||
self.fc2 = nn.Linear(hidden_dim,hidden_dim) # 隐藏层
|
||||
self.fc3 = nn.Linear(hidden_dim, output_dim) # 输出层
|
||||
|
||||
def forward(self, x):
|
||||
# 各层对应的激活函数
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
return self.fc3(x)
|
||||
|
||||
class Critic(nn.Module):
|
||||
def __init__(self, n_obs, n_actions, hidden_size, init_w=3e-3):
|
||||
super(Critic, self).__init__()
|
||||
|
||||
self.linear1 = nn.Linear(n_obs + n_actions, hidden_size)
|
||||
self.linear2 = nn.Linear(hidden_size, hidden_size)
|
||||
self.linear3 = nn.Linear(hidden_size, 1)
|
||||
# 随机初始化为较小的值
|
||||
self.linear3.weight.data.uniform_(-init_w, init_w)
|
||||
self.linear3.bias.data.uniform_(-init_w, init_w)
|
||||
|
||||
def forward(self, state, action):
|
||||
# 按维数1拼接
|
||||
x = torch.cat([state, action], 1)
|
||||
x = F.relu(self.linear1(x))
|
||||
x = F.relu(self.linear2(x))
|
||||
x = self.linear3(x)
|
||||
return x
|
||||
|
||||
class Actor(nn.Module):
|
||||
def __init__(self, n_obs, n_actions, hidden_size, init_w=3e-3):
|
||||
super(Actor, self).__init__()
|
||||
self.linear1 = nn.Linear(n_obs, hidden_size)
|
||||
self.linear2 = nn.Linear(hidden_size, hidden_size)
|
||||
self.linear3 = nn.Linear(hidden_size, n_actions)
|
||||
|
||||
self.linear3.weight.data.uniform_(-init_w, init_w)
|
||||
self.linear3.bias.data.uniform_(-init_w, init_w)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.linear1(x))
|
||||
x = F.relu(self.linear2(x))
|
||||
x = torch.tanh(self.linear3(x))
|
||||
return x
|
||||
|
||||
class ActorCritic(nn.Module):
|
||||
def __init__(self, n_states, n_actions, hidden_dim=256):
|
||||
super(ActorCritic, self).__init__()
|
||||
self.critic = nn.Sequential(
|
||||
nn.Linear(n_states, hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(hidden_dim, 1)
|
||||
)
|
||||
|
||||
self.actor = nn.Sequential(
|
||||
nn.Linear(n_states, hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(hidden_dim, n_actions),
|
||||
nn.Softmax(dim=1),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
value = self.critic(x)
|
||||
probs = self.actor(x)
|
||||
dist = Categorical(probs)
|
||||
return dist, value
|
||||
153
projects/codes/common/multiprocessing_env.py
Normal file
153
projects/codes/common/multiprocessing_env.py
Normal file
@@ -0,0 +1,153 @@
|
||||
# 该代码来自 openai baseline,用于多线程环境
|
||||
# https://github.com/openai/baselines/tree/master/baselines/common/vec_env
|
||||
|
||||
import numpy as np
|
||||
from multiprocessing import Process, Pipe
|
||||
|
||||
def worker(remote, parent_remote, env_fn_wrapper):
|
||||
parent_remote.close()
|
||||
env = env_fn_wrapper.x()
|
||||
while True:
|
||||
cmd, data = remote.recv()
|
||||
if cmd == 'step':
|
||||
ob, reward, done, info = env.step(data)
|
||||
if done:
|
||||
ob = env.reset()
|
||||
remote.send((ob, reward, done, info))
|
||||
elif cmd == 'reset':
|
||||
ob = env.reset()
|
||||
remote.send(ob)
|
||||
elif cmd == 'reset_task':
|
||||
ob = env.reset_task()
|
||||
remote.send(ob)
|
||||
elif cmd == 'close':
|
||||
remote.close()
|
||||
break
|
||||
elif cmd == 'get_spaces':
|
||||
remote.send((env.observation_space, env.action_space))
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
class VecEnv(object):
|
||||
"""
|
||||
An abstract asynchronous, vectorized environment.
|
||||
"""
|
||||
def __init__(self, num_envs, observation_space, action_space):
|
||||
self.num_envs = num_envs
|
||||
self.observation_space = observation_space
|
||||
self.action_space = action_space
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset all the environments and return an array of
|
||||
observations, or a tuple of observation arrays.
|
||||
If step_async is still doing work, that work will
|
||||
be cancelled and step_wait() should not be called
|
||||
until step_async() is invoked again.
|
||||
"""
|
||||
pass
|
||||
|
||||
def step_async(self, actions):
|
||||
"""
|
||||
Tell all the environments to start taking a step
|
||||
with the given actions.
|
||||
Call step_wait() to get the results of the step.
|
||||
You should not call this if a step_async run is
|
||||
already pending.
|
||||
"""
|
||||
pass
|
||||
|
||||
def step_wait(self):
|
||||
"""
|
||||
Wait for the step taken with step_async().
|
||||
Returns (obs, rews, dones, infos):
|
||||
- obs: an array of observations, or a tuple of
|
||||
arrays of observations.
|
||||
- rews: an array of rewards
|
||||
- dones: an array of "episode done" booleans
|
||||
- infos: a sequence of info objects
|
||||
"""
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Clean up the environments' resources.
|
||||
"""
|
||||
pass
|
||||
|
||||
def step(self, actions):
|
||||
self.step_async(actions)
|
||||
return self.step_wait()
|
||||
|
||||
|
||||
class CloudpickleWrapper(object):
|
||||
"""
|
||||
Uses cloudpickle to serialize contents (otherwise multiprocessing tries to use pickle)
|
||||
"""
|
||||
def __init__(self, x):
|
||||
self.x = x
|
||||
def __getstate__(self):
|
||||
import cloudpickle
|
||||
return cloudpickle.dumps(self.x)
|
||||
def __setstate__(self, ob):
|
||||
import pickle
|
||||
self.x = pickle.loads(ob)
|
||||
|
||||
|
||||
class SubprocVecEnv(VecEnv):
|
||||
def __init__(self, env_fns, spaces=None):
|
||||
"""
|
||||
envs: list of gym environments to run in subprocesses
|
||||
"""
|
||||
self.waiting = False
|
||||
self.closed = False
|
||||
nenvs = len(env_fns)
|
||||
self.nenvs = nenvs
|
||||
self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
|
||||
self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
|
||||
for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
|
||||
for p in self.ps:
|
||||
p.daemon = True # if the main process crashes, we should not cause things to hang
|
||||
p.start()
|
||||
for remote in self.work_remotes:
|
||||
remote.close()
|
||||
|
||||
self.remotes[0].send(('get_spaces', None))
|
||||
observation_space, action_space = self.remotes[0].recv()
|
||||
VecEnv.__init__(self, len(env_fns), observation_space, action_space)
|
||||
|
||||
def step_async(self, actions):
|
||||
for remote, action in zip(self.remotes, actions):
|
||||
remote.send(('step', action))
|
||||
self.waiting = True
|
||||
|
||||
def step_wait(self):
|
||||
results = [remote.recv() for remote in self.remotes]
|
||||
self.waiting = False
|
||||
obs, rews, dones, infos = zip(*results)
|
||||
return np.stack(obs), np.stack(rews), np.stack(dones), infos
|
||||
|
||||
def reset(self):
|
||||
for remote in self.remotes:
|
||||
remote.send(('reset', None))
|
||||
return np.stack([remote.recv() for remote in self.remotes])
|
||||
|
||||
def reset_task(self):
|
||||
for remote in self.remotes:
|
||||
remote.send(('reset_task', None))
|
||||
return np.stack([remote.recv() for remote in self.remotes])
|
||||
|
||||
def close(self):
|
||||
if self.closed:
|
||||
return
|
||||
if self.waiting:
|
||||
for remote in self.remotes:
|
||||
remote.recv()
|
||||
for remote in self.remotes:
|
||||
remote.send(('close', None))
|
||||
for p in self.ps:
|
||||
p.join()
|
||||
self.closed = True
|
||||
|
||||
def __len__(self):
|
||||
return self.nenvs
|
||||
126
projects/codes/common/utils.py
Normal file
126
projects/codes/common/utils.py
Normal file
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2021-03-12 16:02:24
|
||||
LastEditor: John
|
||||
LastEditTime: 2022-07-31 23:18:04
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
import os
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import json
|
||||
|
||||
from matplotlib.font_manager import FontProperties # 导入字体模块
|
||||
|
||||
def chinese_font():
|
||||
''' 设置中文字体,注意需要根据自己电脑情况更改字体路径,否则还是默认的字体
|
||||
'''
|
||||
try:
|
||||
font = FontProperties(
|
||||
fname='/System/Library/Fonts/STHeiti Light.ttc', size=15) # fname系统字体路径,此处是mac的
|
||||
except:
|
||||
font = None
|
||||
return font
|
||||
|
||||
def plot_rewards_cn(rewards, ma_rewards, cfg, tag='train'):
|
||||
''' 中文画图
|
||||
'''
|
||||
sns.set()
|
||||
plt.figure()
|
||||
plt.title(u"{}环境下{}算法的学习曲线".format(cfg.env_name,
|
||||
cfg.algo_name), fontproperties=chinese_font())
|
||||
plt.xlabel(u'回合数', fontproperties=chinese_font())
|
||||
plt.plot(rewards)
|
||||
plt.plot(ma_rewards)
|
||||
plt.legend((u'奖励', u'滑动平均奖励',), loc="best", prop=chinese_font())
|
||||
if cfg.save:
|
||||
plt.savefig(cfg.result_path+f"{tag}_rewards_curve_cn")
|
||||
# plt.show()
|
||||
|
||||
|
||||
def plot_rewards(rewards, ma_rewards, cfg, tag='train'):
|
||||
sns.set()
|
||||
plt.figure() # 创建一个图形实例,方便同时多画几个图
|
||||
plt.title("learning curve on {} of {} for {}".format(
|
||||
cfg.device, cfg.algo_name, cfg.env_name))
|
||||
plt.xlabel('epsiodes')
|
||||
plt.plot(rewards, label='rewards')
|
||||
plt.plot(ma_rewards, label='ma rewards')
|
||||
plt.legend()
|
||||
if cfg.save_fig:
|
||||
plt.savefig(cfg.result_path+"{}_rewards_curve".format(tag))
|
||||
plt.show()
|
||||
|
||||
|
||||
def plot_losses(losses, algo="DQN", save=True, path='./'):
|
||||
sns.set()
|
||||
plt.figure()
|
||||
plt.title("loss curve of {}".format(algo))
|
||||
plt.xlabel('epsiodes')
|
||||
plt.plot(losses, label='rewards')
|
||||
plt.legend()
|
||||
if save:
|
||||
plt.savefig(path+"losses_curve")
|
||||
plt.show()
|
||||
|
||||
def save_results(dic, tag='train', path='./results'):
|
||||
''' 保存奖励
|
||||
'''
|
||||
for key,value in dic.items():
|
||||
np.save(path+'{}_{}.npy'.format(tag,key),value)
|
||||
print('Results saved!')
|
||||
|
||||
# def save_results(rewards, ma_rewards, tag='train', path='./results'):
|
||||
# ''' 保存奖励
|
||||
# '''
|
||||
# np.save(path+'{}_rewards.npy'.format(tag), rewards)
|
||||
# np.save(path+'{}_ma_rewards.npy'.format(tag), ma_rewards)
|
||||
# print('Result saved!')
|
||||
|
||||
|
||||
def make_dir(*paths):
|
||||
''' 创建文件夹
|
||||
'''
|
||||
for path in paths:
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def del_empty_dir(*paths):
|
||||
''' 删除目录下所有空文件夹
|
||||
'''
|
||||
for path in paths:
|
||||
dirs = os.listdir(path)
|
||||
for dir in dirs:
|
||||
if not os.listdir(os.path.join(path, dir)):
|
||||
os.removedirs(os.path.join(path, dir))
|
||||
|
||||
def save_args(args):
|
||||
# save parameters
|
||||
args_dict = vars(args)
|
||||
with open(args.result_path+'params.json', 'w') as fp:
|
||||
json.dump(args_dict, fp)
|
||||
print("Parameters saved!")
|
||||
def smooth(data, weight=0.9):
|
||||
'''_summary_
|
||||
|
||||
Args:
|
||||
data (List):输入数据
|
||||
weight (Float): 平滑权重,处于0-1之间,数值越高说明越平滑,一般取0.9
|
||||
|
||||
Returns:
|
||||
smoothed (List): 平滑后的数据
|
||||
'''
|
||||
last = data[0] # First value in the plot (first timestep)
|
||||
smoothed = list()
|
||||
for point in data:
|
||||
smoothed_val = last * weight + (1 - weight) * point # 计算平滑值
|
||||
smoothed.append(smoothed_val)
|
||||
last = smoothed_val
|
||||
|
||||
return smoothed
|
||||
29
projects/codes/common/wrappers.py
Normal file
29
projects/codes/common/wrappers.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import gym
|
||||
|
||||
class TimeLimit(gym.Wrapper):
|
||||
def __init__(self, env, max_episode_steps=None):
|
||||
super(TimeLimit, self).__init__(env)
|
||||
self._max_episode_steps = max_episode_steps
|
||||
self._elapsed_steps = 0
|
||||
|
||||
def step(self, ac):
|
||||
observation, reward, done, info = self.env.step(ac)
|
||||
self._elapsed_steps += 1
|
||||
if self._elapsed_steps >= self._max_episode_steps:
|
||||
done = True
|
||||
info['TimeLimit.truncated'] = True
|
||||
return observation, reward, done, info
|
||||
|
||||
def reset(self, **kwargs):
|
||||
self._elapsed_steps = 0
|
||||
return self.env.reset(**kwargs)
|
||||
|
||||
class ClipActionsWrapper(gym.Wrapper):
|
||||
def step(self, action):
|
||||
import numpy as np
|
||||
action = np.nan_to_num(action)
|
||||
action = np.clip(action, self.action_space.low, self.action_space.high)
|
||||
return self.env.step(action)
|
||||
|
||||
def reset(self, **kwargs):
|
||||
return self.env.reset(**kwargs)
|
||||
Reference in New Issue
Block a user