diff --git a/codes/Q-learning/README.md b/codes/Q-learning/README.md deleted file mode 100644 index be11391..0000000 --- a/codes/Q-learning/README.md +++ /dev/null @@ -1,38 +0,0 @@ -## CliffWalking-v0环境简介 - -悬崖寻路问题(CliffWalking)是指在一个4 x 12的网格中,智能体以网格的左下角位置为起点,以网格的下角位置为终点,目标是移动智能体到达终点位置,智能体每次可以在上、下、左、右这4个方向中移动一步,每移动一步会得到-1单位的奖励。 - -![](assets/cliffwalking_1.png) - -如图,红色部分表示悬崖,数字代表智能体能够观测到的位置信息,即observation,总共会有0-47等48个不同的值,智能体再移动中会有以下限制: - -* 智能体不能移出网格,如果智能体想执行某个动作移出网格,那么这一步智能体不会移动,但是这个操作依然会得到-1单位的奖励 - -* 如果智能体“掉入悬崖” ,会立即回到起点位置,并得到-100单位的奖励 - -* 当智能体移动到终点时,该回合结束,该回合总奖励为各步奖励之和 - -实际的仿真界面如下: - -![](assets/cliffwalking_2.png) - -由于从起点到终点最少需要13步,每步得到-1的reward,因此最佳训练算法下,每个episode下reward总和应该为-13。 - - -## 使用 - -train: - -```python -python main.py -``` - -eval: - -```python -python main.py --train 0 -``` -tensorboard: -```python -tensorboard --logdir logs -``` \ No newline at end of file diff --git a/codes/Q-learning/agent.py b/codes/Q-learning/agent.py deleted file mode 100644 index c18970e..0000000 --- a/codes/Q-learning/agent.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -Author: John -Email: johnjim0816@gmail.com -Date: 2020-09-11 23:03:00 -LastEditor: John -LastEditTime: 2020-12-12 10:13:47 -Discription: -Environment: -''' -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import numpy as np -import math - -class QLearning(object): - def __init__(self, - obs_dim, - action_dim, - learning_rate=0.01, - gamma=0.9, - epsilon_start=0.9,epsilon_end=0.1,epsilon_decay=200): - self.action_dim = action_dim # 动作维度,有几个动作可选 - self.lr = learning_rate # 学习率 - self.gamma = gamma # reward 的衰减率 - self.epsilon = 0 # 按一定概率随机选动作,即 e-greedy 策略, 并且epsilon逐渐衰减 - self.sample_count = 0 # epsilon随训练的也就是采样次数逐渐衰减,所以需要计数 - self.epsilon_start = epsilon_start - self.epsilon_end = epsilon_end - self.epsilon_decay= epsilon_decay - self.Q_table = np.zeros((obs_dim, action_dim)) # Q表 - - def sample(self, obs): - '''根据输入观测值,采样输出的动作值,带探索,训练模型时使用 - ''' - self.sample_count += 1 - self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ - math.exp(-1. * self.sample_count / self.epsilon_decay) - if np.random.uniform(0, 1) > self.epsilon: # 随机选取0-1之间的值,如果大于epsilon就按照贪心策略选取action,否则随机选取 - action = self.predict(obs) - else: - action = np.random.choice(self.action_dim) #有一定概率随机探索选取一个动作 - return action - def predict(self, obs): - '''根据输入观测值,采样输出的动作值,不带探索,测试模型时使用 - ''' - Q_list = self.Q_table[obs, :] - Q_max = np.max(Q_list) - action_list = np.where(Q_list == Q_max)[0] - action = np.random.choice(action_list) # Q_max可能对应多个 action ,可以随机抽取一个 - return action - - def learn(self, obs, action, reward, next_obs, done): - '''学习方法(off-policy),也就是更新Q-table的方法 - Args: - obs [type]: 交互前的obs, s_t - action [type]: 本次交互选择的action, a_t - reward [type]: 本次动作获得的奖励r - next_obs [type]: 本次交互后的obs, s_t+1 - done function: episode是否结束 - ''' - Q_predict = self.Q_table[obs, action] - if done: - Q_target = reward # 没有下一个状态了 - else: - Q_target = reward + self.gamma * np.max( - self.Q_table[next_obs, :]) # Q_table-learning - self.Q_table[obs, action] += self.lr * (Q_target - Q_predict) # 修正q - - def save_model(self,path): - '''把 Q表格 的数据保存到文件中 - ''' - np.save(path, self.Q_table) - def load_model(self, path): - '''从文件中读取数据到 Q表格 - ''' - self.Q_table = np.load(path) diff --git a/codes/Q-learning/assets/cliffwalking_1.png b/codes/Q-learning/assets/cliffwalking_1.png deleted file mode 100644 index ae5b0f8..0000000 Binary files a/codes/Q-learning/assets/cliffwalking_1.png and /dev/null differ diff --git a/codes/Q-learning/assets/cliffwalking_2.png b/codes/Q-learning/assets/cliffwalking_2.png deleted file mode 100644 index 0bbb5b2..0000000 Binary files a/codes/Q-learning/assets/cliffwalking_2.png and /dev/null differ diff --git a/codes/Q-learning/env.py b/codes/Q-learning/env.py deleted file mode 100644 index 373e3f4..0000000 --- a/codes/Q-learning/env.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- - -import gym -import turtle -import numpy as np - -def env_init_1(): - ''' 初始化CliffWalking-v0环境 - ''' - env = gym.make("CliffWalking-v0") # 0 up, 1 right, 2 down, 3 left - env = CliffWalkingWapper(env) - return env - -def env_init_2(gridmap=None, is_slippery=False): - if gridmap is None: - gridmap = ['SFFF', 'FHFH', 'FFFH', 'HFFG'] - env = gym.make("FrozenLake-v0", desc=gridmap, is_slippery=False) - env = FrozenLakeWapper(env) - return env - - -class FrozenLakeWapper(gym.Wrapper): - def __init__(self, env): - gym.Wrapper.__init__(self, env) - self.max_y = env.desc.shape[0] - self.max_x = env.desc.shape[1] - self.t = None - self.unit = 50 - - def draw_box(self, x, y, fillcolor='', line_color='gray'): - self.t.up() - self.t.goto(x * self.unit, y * self.unit) - self.t.color(line_color) - self.t.fillcolor(fillcolor) - self.t.setheading(90) - self.t.down() - self.t.begin_fill() - for _ in range(4): - self.t.forward(self.unit) - self.t.right(90) - self.t.end_fill() - - def move_player(self, x, y): - self.t.up() - self.t.setheading(90) - self.t.fillcolor('red') - self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit) - - def render(self): - if self.t == None: - self.t = turtle.Turtle() - self.wn = turtle.Screen() - self.wn.setup(self.unit * self.max_x + 100, - self.unit * self.max_y + 100) - self.wn.setworldcoordinates(0, 0, self.unit * self.max_x, - self.unit * self.max_y) - self.t.shape('circle') - self.t.width(2) - self.t.speed(0) - self.t.color('gray') - for i in range(self.desc.shape[0]): - for j in range(self.desc.shape[1]): - x = j - y = self.max_y - 1 - i - if self.desc[i][j] == b'S': # Start - self.draw_box(x, y, 'white') - elif self.desc[i][j] == b'F': # Frozen ice - self.draw_box(x, y, 'white') - elif self.desc[i][j] == b'G': # Goal - self.draw_box(x, y, 'yellow') - elif self.desc[i][j] == b'H': # Hole - self.draw_box(x, y, 'black') - else: - self.draw_box(x, y, 'white') - self.t.shape('turtle') - - x_pos = self.s % self.max_x - y_pos = self.max_y - 1 - int(self.s / self.max_x) - self.move_player(x_pos, y_pos) - - -class CliffWalkingWapper(gym.Wrapper): - def __init__(self, env): - gym.Wrapper.__init__(self, env) - self.t = None - self.unit = 50 - self.max_x = 12 - self.max_y = 4 - - def draw_x_line(self, y, x0, x1, color='gray'): - assert x1 > x0 - self.t.color(color) - self.t.setheading(0) - self.t.up() - self.t.goto(x0, y) - self.t.down() - self.t.forward(x1 - x0) - - def draw_y_line(self, x, y0, y1, color='gray'): - assert y1 > y0 - self.t.color(color) - self.t.setheading(90) - self.t.up() - self.t.goto(x, y0) - self.t.down() - self.t.forward(y1 - y0) - - def draw_box(self, x, y, fillcolor='', line_color='gray'): - self.t.up() - self.t.goto(x * self.unit, y * self.unit) - self.t.color(line_color) - self.t.fillcolor(fillcolor) - self.t.setheading(90) - self.t.down() - self.t.begin_fill() - for i in range(4): - self.t.forward(self.unit) - self.t.right(90) - self.t.end_fill() - - def move_player(self, x, y): - self.t.up() - self.t.setheading(90) - self.t.fillcolor('red') - self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit) - - def render(self): - if self.t == None: - self.t = turtle.Turtle() - self.wn = turtle.Screen() - self.wn.setup(self.unit * self.max_x + 100, - self.unit * self.max_y + 100) - self.wn.setworldcoordinates(0, 0, self.unit * self.max_x, - self.unit * self.max_y) - self.t.shape('circle') - self.t.width(2) - self.t.speed(0) - self.t.color('gray') - for _ in range(2): - self.t.forward(self.max_x * self.unit) - self.t.left(90) - self.t.forward(self.max_y * self.unit) - self.t.left(90) - for i in range(1, self.max_y): - self.draw_x_line( - y=i * self.unit, x0=0, x1=self.max_x * self.unit) - for i in range(1, self.max_x): - self.draw_y_line( - x=i * self.unit, y0=0, y1=self.max_y * self.unit) - - for i in range(1, self.max_x - 1): - self.draw_box(i, 0, 'black') - self.draw_box(self.max_x - 1, 0, 'yellow') - self.t.shape('turtle') - - x_pos = self.s % self.max_x - y_pos = self.max_y - 1 - int(self.s / self.max_x) - self.move_player(x_pos, y_pos) - - -if __name__ == '__main__': - # 环境1:FrozenLake, 可以配置冰面是否是滑的 - # 0 left, 1 down, 2 right, 3 up - env = gym.make("FrozenLake-v0", is_slippery=False) - env = FrozenLakeWapper(env) - - # 环境2:CliffWalking, 悬崖环境 - # env = gym.make("CliffWalking-v0") # 0 up, 1 right, 2 down, 3 left - # env = CliffWalkingWapper(env) - - # 环境3:自定义格子世界,可以配置地图, S为出发点Start, F为平地Floor, H为洞Hole, G为出口目标Goal - # gridmap = [ - # 'SFFF', - # 'FHFF', - # 'FFFF', - # 'HFGF' ] - # env = GridWorld(gridmap) - - env.reset() - for step in range(10): - action = np.random.randint(0, 4) - obs, reward, done, info = env.step(action) - print('step {}: action {}, obs {}, reward {}, done {}, info {}'.format(\ - step, action, obs, reward, done, info)) - # env.render() # 渲染一帧图像 \ No newline at end of file diff --git a/codes/Q-learning/main.py b/codes/Q-learning/main.py deleted file mode 100644 index 75618d7..0000000 --- a/codes/Q-learning/main.py +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -Author: John -Email: johnjim0816@gmail.com -Date: 2020-09-11 23:03:00 -LastEditor: John -LastEditTime: 2021-01-05 09:41:34 -Discription: -Environment: -''' -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- - -import gym -from env import CliffWalkingWapper, FrozenLakeWapper -from agent import QLearning -import os -import numpy as np -import argparse -import time -import matplotlib.pyplot as plt -from env import env_init_1 -from params import get_args -from params import SEQUENCE, SAVED_MODEL_PATH, RESULT_PATH -from utils import save_results,save_model -from plot import plot - -def train(cfg): - '''# env = gym.make("FrozenLake-v0", is_slippery=False) # 0 left, 1 down, 2 right, 3 up - # env = FrozenLakeWapper(env)''' - env = env_init_1() - agent = QLearning( - obs_dim=env.observation_space.n, - action_dim=env.action_space.n, - learning_rate=cfg.policy_lr, - gamma=cfg.gamma, - epsilon_start=cfg.epsilon_start,epsilon_end=cfg.epsilon_end,epsilon_decay=cfg.epsilon_decay) - render = False # 是否打开GUI画面 - rewards = [] # 记录所有episode的reward - MA_rewards = [] # 记录滑动平均的reward - steps = []# 记录所有episode的steps - for i_episode in range(1,cfg.max_episodes+1): - ep_reward = 0 # 记录每个episode的reward - ep_steps = 0 # 记录每个episode走了多少step - obs = env.reset() # 重置环境, 重新开一局(即开始新的一个episode) - while True: - action = agent.sample(obs) # 根据算法选择一个动作 - next_obs, reward, done, _ = env.step(action) # 与环境进行一个交互 - # 训练 Q-learning算法 - agent.learn(obs, action, reward, next_obs, done) # 不需要下一步的action - - obs = next_obs # 存储上一个观察值 - ep_reward += reward - ep_steps += 1 # 计算step数 - if render: - env.render() #渲染新的一帧图形 - if done: - break - steps.append(ep_steps) - rewards.append(ep_reward) - '''计算滑动平均的reward''' - if i_episode == 1: - MA_rewards.append(ep_reward) - else: - MA_rewards.append( - 0.9*MA_rewards[-1]+0.1*ep_reward) - print('Episode %s: steps = %s , reward = %.1f, explore = %.2f' % (i_episode, ep_steps, - ep_reward,agent.epsilon)) - '''每隔20个episode渲染一下看看效果''' - if i_episode % 20 == 0: - render = True - else: - render = False - print('Complete training!') - save_model(agent,model_path=SAVED_MODEL_PATH) - '''存储reward等相关结果''' - save_results(rewards,MA_rewards,tag='train',result_path=RESULT_PATH) - plot(rewards) - plot(MA_rewards,ylabel='moving_average_rewards_train') - -def eval(cfg, saved_model_path = SAVED_MODEL_PATH): - - env = gym.make("CliffWalking-v0") # 0 up, 1 right, 2 down, 3 left - env = CliffWalkingWapper(env) - agent = QLearning( - obs_dim=env.observation_space.n, - action_dim=env.action_space.n, - learning_rate=cfg.policy_lr, - gamma=cfg.gamma, - epsilon_start=cfg.epsilon_start,epsilon_end=cfg.epsilon_end,epsilon_decay=cfg.epsilon_decay) - agent.load_model(saved_model_path+'checkpoint.npy') # 导入保存的模型 - rewards = [] # 记录所有episode的reward - MA_rewards = [] # 记录滑动平均的reward - steps = []# 记录所有episode的steps - for i_episode in range(1,10+1): - ep_reward = 0 # 记录每个episode的reward - ep_steps = 0 # 记录每个episode走了多少step - obs = env.reset() # 重置环境, 重新开一局(即开始新的一个episode) - while True: - action = agent.predict(obs) # 根据算法选择一个动作 - next_obs, reward, done, _ = env.step(action) # 与环境进行一个交互 - obs = next_obs # 存储上一个观察值 - time.sleep(0.5) - env.render() - ep_reward += reward - ep_steps += 1 # 计算step数 - if done: - break - steps.append(ep_steps) - rewards.append(ep_reward) - # 计算滑动平均的reward - if i_episode == 1: - MA_rewards.append(ep_reward) - else: - MA_rewards.append( - 0.9*MA_rewards[-1]+0.1*ep_reward) - print('Episode %s: steps = %s , reward = %.1f' % (i_episode, ep_steps, ep_reward)) - print('Complete training!') - save_model(agent,model_path=SAVED_MODEL_PATH) - '''存储reward等相关结果''' - save_results(rewards,MA_rewards,tag='train',result_path=RESULT_PATH) - plot(rewards) - plot(MA_rewards,ylabel='moving_average_rewards_train') - -if __name__ == "__main__": - cfg = get_args() - if cfg.train: - train(cfg) - eval(cfg) - else: - model_path = os.path.split(os.path.abspath(__file__))[0]+"/saved_model/" - eval(cfg,saved_model_path=model_path) \ No newline at end of file diff --git a/codes/Q-learning/params.py b/codes/Q-learning/params.py deleted file mode 100644 index 89cbee7..0000000 --- a/codes/Q-learning/params.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -Author: John -Email: johnjim0816@gmail.com -Date: 2020-11-24 19:45:58 -LastEditor: John -LastEditTime: 2020-11-24 19:53:13 -Discription: -Environment: -''' -import argparse -import datetime -import os - -SEQUENCE = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") -SAVED_MODEL_PATH = os.path.split(os.path.abspath(__file__))[0]+"/saved_model/"+SEQUENCE+'/' -RESULT_PATH = os.path.split(os.path.abspath(__file__))[0]+"/result/"+SEQUENCE+'/' - -def get_args(): - '''训练的模型参数 - ''' - parser = argparse.ArgumentParser() - parser.add_argument("--train", default=1, type=int) # 1 表示训练,0表示只进行eval - parser.add_argument("--gamma", default=0.9, - type=float, help="reward 的衰减率") - parser.add_argument("--epsilon_start", default=0.9, - type=float,help="e-greedy策略中初始epsilon") - parser.add_argument("--epsilon_end", default=0.1, type=float,help="e-greedy策略中的结束epsilon") - parser.add_argument("--epsilon_decay", default=200, type=float,help="e-greedy策略中epsilon的衰减率") - parser.add_argument("--policy_lr", default=0.1, type=float,help="学习率") - parser.add_argument("--max_episodes", default=500, type=int,help="训练的最大episode数目") - - config = parser.parse_args() - - return config \ No newline at end of file diff --git a/codes/Q-learning/plot.py b/codes/Q-learning/plot.py deleted file mode 100644 index e64ceba..0000000 --- a/codes/Q-learning/plot.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -Author: John -Email: johnjim0816@gmail.com -Date: 2020-10-07 20:57:11 -LastEditor: John -LastEditTime: 2020-10-07 21:00:29 -Discription: -Environment: -''' -import matplotlib.pyplot as plt -import seaborn as sns -import numpy as np -import os - -def plot(item,ylabel='rewards'): - sns.set() - plt.figure() - plt.plot(np.arange(len(item)), item) - plt.title(ylabel+' of Q-learning') - plt.ylabel(ylabel) - plt.xlabel('episodes') - plt.savefig(os.path.dirname(__file__)+"/result/"+ylabel+".png") - plt.show() - -if __name__ == "__main__": - - output_path = os.path.dirname(__file__)+"/result/" - rewards=np.load(output_path+"rewards_train.npy", ) - MA_rewards=np.load(output_path+"MA_rewards_train.npy") - steps = np.load(output_path+"steps_train.npy") - plot(rewards) - plot(MA_rewards,ylabel='moving_average_rewards') - plot(steps,ylabel='steps') \ No newline at end of file diff --git a/codes/Q-learning/result/20201124-201903/moving_average_rewards_train.npy b/codes/Q-learning/result/20201124-201903/moving_average_rewards_train.npy deleted file mode 100644 index 3804bd9..0000000 Binary files a/codes/Q-learning/result/20201124-201903/moving_average_rewards_train.npy and /dev/null differ diff --git a/codes/Q-learning/result/20201124-201903/rewards_train.npy b/codes/Q-learning/result/20201124-201903/rewards_train.npy deleted file mode 100644 index 3029df3..0000000 Binary files a/codes/Q-learning/result/20201124-201903/rewards_train.npy and /dev/null differ diff --git a/codes/Q-learning/result/20210105-094149/moving_average_rewards_train.npy b/codes/Q-learning/result/20210105-094149/moving_average_rewards_train.npy deleted file mode 100644 index f3df3ea..0000000 Binary files a/codes/Q-learning/result/20210105-094149/moving_average_rewards_train.npy and /dev/null differ diff --git a/codes/Q-learning/result/20210105-094149/rewards_train.npy b/codes/Q-learning/result/20210105-094149/rewards_train.npy deleted file mode 100644 index 3db7133..0000000 Binary files a/codes/Q-learning/result/20210105-094149/rewards_train.npy and /dev/null differ diff --git a/codes/Q-learning/result/MA_rewards_train.npy b/codes/Q-learning/result/MA_rewards_train.npy deleted file mode 100644 index 87d60a9..0000000 Binary files a/codes/Q-learning/result/MA_rewards_train.npy and /dev/null differ diff --git a/codes/Q-learning/result/Q_table.npy b/codes/Q-learning/result/Q_table.npy deleted file mode 100644 index efc9546..0000000 Binary files a/codes/Q-learning/result/Q_table.npy and /dev/null differ diff --git a/codes/Q-learning/result/moving_average_rewards.png b/codes/Q-learning/result/moving_average_rewards.png deleted file mode 100644 index 2b28aa5..0000000 Binary files a/codes/Q-learning/result/moving_average_rewards.png and /dev/null differ diff --git a/codes/Q-learning/result/moving_average_rewards_train.png b/codes/Q-learning/result/moving_average_rewards_train.png deleted file mode 100644 index 6ba9b6a..0000000 Binary files a/codes/Q-learning/result/moving_average_rewards_train.png and /dev/null differ diff --git a/codes/Q-learning/result/rewards.png b/codes/Q-learning/result/rewards.png deleted file mode 100644 index 9fe320e..0000000 Binary files a/codes/Q-learning/result/rewards.png and /dev/null differ diff --git a/codes/Q-learning/result/rewards_train.npy b/codes/Q-learning/result/rewards_train.npy deleted file mode 100644 index ce76321..0000000 Binary files a/codes/Q-learning/result/rewards_train.npy and /dev/null differ diff --git a/codes/Q-learning/result/steps.png b/codes/Q-learning/result/steps.png deleted file mode 100644 index 5e0c667..0000000 Binary files a/codes/Q-learning/result/steps.png and /dev/null differ diff --git a/codes/Q-learning/result/steps_train.npy b/codes/Q-learning/result/steps_train.npy deleted file mode 100644 index b11ef15..0000000 Binary files a/codes/Q-learning/result/steps_train.npy and /dev/null differ diff --git a/codes/Q-learning/saved_model/20201124-201903/checkpoint.npy b/codes/Q-learning/saved_model/20201124-201903/checkpoint.npy deleted file mode 100644 index 0a49f57..0000000 Binary files a/codes/Q-learning/saved_model/20201124-201903/checkpoint.npy and /dev/null differ diff --git a/codes/Q-learning/saved_model/20210105-094149/checkpoint.npy b/codes/Q-learning/saved_model/20210105-094149/checkpoint.npy deleted file mode 100644 index 0a49f57..0000000 Binary files a/codes/Q-learning/saved_model/20210105-094149/checkpoint.npy and /dev/null differ diff --git a/codes/Q-learning/saved_model/checkpoint.npy b/codes/Q-learning/saved_model/checkpoint.npy deleted file mode 100644 index 0a49f57..0000000 Binary files a/codes/Q-learning/saved_model/checkpoint.npy and /dev/null differ diff --git a/codes/Q-learning/utils.py b/codes/Q-learning/utils.py deleted file mode 100644 index f6fe640..0000000 --- a/codes/Q-learning/utils.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -Author: John -Email: johnjim0816@gmail.com -Date: 2020-11-24 19:50:18 -LastEditor: John -LastEditTime: 2020-11-24 20:20:46 -Discription: -Environment: -''' -import os -import numpy as np - - -def save_results(rewards,moving_average_rewards,tag='train',result_path='./result'): - '''保存reward等结果 - ''' - if not os.path.exists(result_path): # 检测是否存在文件夹 - os.mkdir(result_path) - np.save(result_path+'rewards_'+tag+'.npy', rewards) - np.save(result_path+'moving_average_rewards_'+tag+'.npy', moving_average_rewards) - print('results saved!') - -def save_model(agent,model_path='./saved_model'): - if not os.path.exists(model_path): # 检测是否存在文件夹 - os.mkdir(model_path) - agent.save_model(model_path+'checkpoint') - print('model saved!') \ No newline at end of file