update
This commit is contained in:
@@ -1,40 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
@Author: John
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-10 15:27:16
|
||||
@LastEditor: John
|
||||
LastEditTime: 2021-01-20 18:58:37
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
import random
|
||||
|
||||
class ReplayBuffer:
|
||||
|
||||
def __init__(self, capacity):
|
||||
self.capacity = capacity # buffer的最大容量
|
||||
self.buffer = []
|
||||
self.position = 0
|
||||
|
||||
def push(self, state, action, reward, next_state, done):
|
||||
'''以队列的方式将样本填入buffer中
|
||||
'''
|
||||
if len(self.buffer) < self.capacity:
|
||||
self.buffer.append(None)
|
||||
self.buffer[self.position] = (state, action, reward, next_state, done)
|
||||
self.position = (self.position + 1) % self.capacity
|
||||
|
||||
def sample(self, batch_size):
|
||||
'''随机采样batch_size个样本
|
||||
'''
|
||||
batch = random.sample(self.buffer, batch_size)
|
||||
state, action, reward, next_state, done = zip(*batch)
|
||||
return state, action, reward, next_state, done
|
||||
|
||||
def __len__(self):
|
||||
'''返回buffer的长度
|
||||
'''
|
||||
return len(self.buffer)
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
@Author: John
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-12 00:47:02
|
||||
@LastEditor: John
|
||||
LastEditTime: 2020-08-19 16:55:54
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class MLP(nn.Module):
|
||||
def __init__(self, n_states=4, n_actions=18):
|
||||
""" 初始化q网络,为全连接网络
|
||||
n_states: 输入的feature即环境的state数目
|
||||
n_actions: 输出的action总个数
|
||||
"""
|
||||
super(MLP, self).__init__()
|
||||
self.fc1 = nn.Linear(n_states, 128) # 输入层
|
||||
self.fc2 = nn.Linear(128, 128) # 隐藏层
|
||||
self.fc3 = nn.Linear(128, n_actions) # 输出层
|
||||
|
||||
def forward(self, x):
|
||||
# 各层对应的激活函数
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
return self.fc3(x)
|
||||
@@ -1,51 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2020-12-22 15:22:17
|
||||
LastEditor: John
|
||||
LastEditTime: 2021-01-21 14:30:38
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
import datetime
|
||||
import os
|
||||
import argparse
|
||||
|
||||
ALGO_NAME = 'Double DQN'
|
||||
SEQUENCE = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
SAVED_MODEL_PATH = os.path.split(os.path.abspath(__file__))[0]+"/saved_model/"+SEQUENCE+'/'
|
||||
RESULT_PATH = os.path.split(os.path.abspath(__file__))[0]+"/results/"+SEQUENCE+'/'
|
||||
|
||||
TRAIN_LOG_DIR=os.path.split(os.path.abspath(__file__))[0]+"/logs/train/" + SEQUENCE
|
||||
EVAL_LOG_DIR=os.path.split(os.path.abspath(__file__))[0]+"/logs/eval/" + SEQUENCE
|
||||
|
||||
def get_args():
|
||||
'''模型参数
|
||||
'''
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--train", default=1, type=int) # 1 表示训练,0表示只进行eval
|
||||
parser.add_argument("--gamma", default=0.99,
|
||||
type=float) # q-learning中的gamma
|
||||
parser.add_argument("--epsilon_start", default=0.95,
|
||||
type=float) # 基于贪心选择action对应的参数epsilon
|
||||
parser.add_argument("--epsilon_end", default=0.01, type=float)
|
||||
parser.add_argument("--epsilon_decay", default=500, type=float)
|
||||
parser.add_argument("--policy_lr", default=0.01, type=float)
|
||||
parser.add_argument("--memory_capacity", default=1000,
|
||||
type=int, help="capacity of Replay Memory")
|
||||
|
||||
parser.add_argument("--batch_size", default=32, type=int,
|
||||
help="batch size of memory sampling")
|
||||
parser.add_argument("--train_eps", default=200, type=int) # 训练的最大episode数目
|
||||
parser.add_argument("--train_steps", default=200, type=int)
|
||||
parser.add_argument("--target_update", default=2, type=int,
|
||||
help="when(every default 2 eisodes) to update target net ") # 更新频率
|
||||
|
||||
parser.add_argument("--eval_eps", default=100, type=int) # 训练的最大episode数目
|
||||
parser.add_argument("--eval_steps", default=200,
|
||||
type=int) # 训练每个episode的长度
|
||||
config = parser.parse_args()
|
||||
|
||||
return config
|
||||
@@ -1,48 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
@Author: John
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-11 16:30:09
|
||||
@LastEditor: John
|
||||
LastEditTime: 2020-12-22 15:24:31
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import numpy as np
|
||||
import os
|
||||
from params import ALGO_NAME
|
||||
def plot(item,ylabel='rewards_train', save_fig = True):
|
||||
'''plot using searborn to plot
|
||||
'''
|
||||
sns.set()
|
||||
plt.figure()
|
||||
plt.plot(np.arange(len(item)), item)
|
||||
plt.title(ylabel+' of '+ALGO_NAME)
|
||||
plt.ylabel(ylabel)
|
||||
plt.xlabel('episodes')
|
||||
if save_fig:
|
||||
plt.savefig(os.path.dirname(__file__)+"/results/"+ylabel+".png")
|
||||
plt.show()
|
||||
|
||||
|
||||
# plt.show()
|
||||
if __name__ == "__main__":
|
||||
|
||||
output_path = os.path.split(os.path.abspath(__file__))[0]+"/results/"
|
||||
tag = 'train'
|
||||
rewards=np.load(output_path+"rewards_"+tag+".npy", )
|
||||
moving_average_rewards=np.load(output_path+"moving_average_rewards_"+tag+".npy",)
|
||||
steps=np.load(output_path+"steps_"+tag+".npy")
|
||||
plot(rewards)
|
||||
plot(moving_average_rewards,ylabel='moving_average_rewards_'+tag)
|
||||
plot(steps,ylabel='steps_'+tag)
|
||||
tag = 'eval'
|
||||
rewards=np.load(output_path+"rewards_"+tag+".npy", )
|
||||
moving_average_rewards=np.load(output_path+"moving_average_rewards_"+tag+".npy",)
|
||||
steps=np.load(output_path+"steps_"+tag+".npy")
|
||||
plot(rewards,ylabel='rewards_'+tag)
|
||||
plot(moving_average_rewards,ylabel='moving_average_rewards_'+tag)
|
||||
plot(steps,ylabel='steps_'+tag)
|
||||
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 74 KiB |
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user