{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10-final" }, "orig_nbformat": 2, "kernelspec": { "name": "python3", "display_name": "Python 3", "language": "python" } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys,os\n", "from pathlib import Path\n", "curr_path = str(Path().absolute())\n", "parent_path = str(Path().absolute().parent)\n", "sys.path.append(parent_path) # add current terminal path to sys.path\n", "import gym\n", "import torch\n", "import datetime\n", "from DQN.agent import DQN\n", "from common.plot import plot_rewards\n", "from common.utils import save_results" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "SEQUENCE = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\") # 获取当前时间\n", "SAVED_MODEL_PATH = curr_path+\"/saved_model/\"+SEQUENCE+'/' # 生成保存的模型路径\n", "if not os.path.exists(curr_path+\"/saved_model/\"): # 检测是否存在文件夹\n", " os.mkdir(curr_path+\"/saved_model/\")\n", "if not os.path.exists(SAVED_MODEL_PATH): # 检测是否存在文件夹\n", " os.mkdir(SAVED_MODEL_PATH)\n", "RESULT_PATH = curr_path+\"/results/\"+SEQUENCE+'/' # 存储reward的路径\n", "if not os.path.exists(curr_path+\"/results/\"): # 检测是否存在文件夹\n", " os.mkdir(curr_path+\"/results/\")\n", "if not os.path.exists(RESULT_PATH): # 检测是否存在文件夹\n", " os.mkdir(RESULT_PATH)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "class DQNConfig:\n", " def __init__(self):\n", " self.algo = \"DQN\" # 算法名称\n", " self.gamma = 0.99\n", " self.epsilon_start = 0.95 # e-greedy策略的初始epsilon\n", " self.epsilon_end = 0.01\n", " self.epsilon_decay = 200\n", " self.lr = 0.01 # 学习率\n", " self.memory_capacity = 800 # Replay Memory容量\n", " self.batch_size = 64\n", " self.train_eps = 300 # 训练的episode数目\n", " self.train_steps = 200 # 训练每个episode的最大长度\n", " self.target_update = 2 # target net的更新频率\n", " self.eval_eps = 20 # 测试的episode数目\n", " self.eval_steps = 200 # 测试每个episode的最大长度\n", " self.device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # 检测gpu\n", " self.hidden_dim = 128 # 神经网络隐藏层维度" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def train(cfg,env,agent):\n", " print('Start to train !')\n", " rewards = []\n", " ma_rewards = [] # 滑动平均的reward\n", " ep_steps = []\n", " for i_episode in range(cfg.train_eps):\n", " state = env.reset() # reset环境状态\n", " ep_reward = 0\n", " for i_step in range(cfg.train_steps):\n", " action = agent.choose_action(state) # 根据当前环境state选择action\n", " next_state, reward, done, _ = env.step(action) # 更新环境参数\n", " ep_reward += reward\n", " agent.memory.push(state, action, reward, next_state, done) # 将state等这些transition存入memory\n", " state = next_state # 跳转到下一个状态\n", " agent.update() # 每步更新网络\n", " if done:\n", " break\n", " # 更新target network,复制DQN中的所有weights and biases\n", " if i_episode % cfg.target_update == 0:\n", " agent.target_net.load_state_dict(agent.policy_net.state_dict())\n", " print('Episode:{}/{}, Reward:{}, Steps:{}, Done:{}'.format(i_episode+1,cfg.train_eps,ep_reward,i_step+1,done))\n", " ep_steps.append(i_step)\n", " rewards.append(ep_reward)\n", " # 计算滑动窗口的reward\n", " if ma_rewards:\n", " ma_rewards.append(\n", " 0.9*ma_rewards[-1]+0.1*ep_reward)\n", " else:\n", " ma_rewards.append(ep_reward) \n", " print('Complete training!')\n", " return rewards,ma_rewards" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "tags": [] }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Start to train !\n", "Episode:1/300, Reward:41.0, Steps:41, Done:True\n", "Episode:2/300, Reward:23.0, Steps:23, Done:True\n", "Episode:3/300, Reward:19.0, Steps:19, Done:True\n", "Episode:4/300, Reward:17.0, Steps:17, Done:True\n", "Episode:5/300, Reward:14.0, Steps:14, Done:True\n", "Episode:6/300, Reward:15.0, Steps:15, Done:True\n", "Episode:7/300, Reward:10.0, Steps:10, Done:True\n", "Episode:8/300, Reward:23.0, Steps:23, Done:True\n", "Episode:9/300, Reward:14.0, Steps:14, Done:True\n", "Episode:10/300, Reward:9.0, Steps:9, Done:True\n", "Episode:11/300, Reward:9.0, Steps:9, Done:True\n", "Episode:12/300, Reward:9.0, Steps:9, Done:True\n", "Episode:13/300, Reward:9.0, Steps:9, Done:True\n", "Episode:14/300, Reward:10.0, Steps:10, Done:True\n", "Episode:15/300, Reward:10.0, Steps:10, Done:True\n", "Episode:16/300, Reward:12.0, Steps:12, Done:True\n", "Episode:17/300, Reward:10.0, Steps:10, Done:True\n", "Episode:18/300, Reward:10.0, Steps:10, Done:True\n", "Episode:19/300, Reward:9.0, Steps:9, Done:True\n", "Episode:20/300, Reward:10.0, Steps:10, Done:True\n", "Episode:21/300, Reward:8.0, Steps:8, Done:True\n", "Episode:22/300, Reward:10.0, Steps:10, Done:True\n", "Episode:23/300, Reward:10.0, Steps:10, Done:True\n", "Episode:24/300, Reward:13.0, Steps:13, Done:True\n", "Episode:25/300, Reward:10.0, Steps:10, Done:True\n", "Episode:26/300, Reward:9.0, Steps:9, Done:True\n", "Episode:27/300, Reward:9.0, Steps:9, Done:True\n", "Episode:28/300, Reward:9.0, Steps:9, Done:True\n", "Episode:29/300, Reward:12.0, Steps:12, Done:True\n", "Episode:30/300, Reward:9.0, Steps:9, Done:True\n", "Episode:31/300, Reward:9.0, Steps:9, Done:True\n", "Episode:32/300, Reward:10.0, Steps:10, Done:True\n", "Episode:33/300, Reward:11.0, Steps:11, Done:True\n", "Episode:34/300, Reward:12.0, Steps:12, Done:True\n", "Episode:35/300, Reward:8.0, Steps:8, Done:True\n", "Episode:36/300, Reward:10.0, Steps:10, Done:True\n", "Episode:37/300, Reward:9.0, Steps:9, Done:True\n", "Episode:38/300, Reward:10.0, Steps:10, Done:True\n", "Episode:39/300, Reward:10.0, Steps:10, Done:True\n", "Episode:40/300, Reward:10.0, Steps:10, Done:True\n", "Episode:41/300, Reward:9.0, Steps:9, Done:True\n", "Episode:42/300, Reward:10.0, Steps:10, Done:True\n", "Episode:43/300, Reward:10.0, Steps:10, Done:True\n", "Episode:44/300, Reward:10.0, Steps:10, Done:True\n", "Episode:45/300, Reward:9.0, Steps:9, Done:True\n", "Episode:46/300, Reward:22.0, Steps:22, Done:True\n", "Episode:47/300, Reward:74.0, Steps:74, Done:True\n", "Episode:48/300, Reward:13.0, Steps:13, Done:True\n", "Episode:49/300, Reward:29.0, Steps:29, Done:True\n", "Episode:50/300, Reward:56.0, Steps:56, Done:True\n", "Episode:51/300, Reward:74.0, Steps:74, Done:True\n", "Episode:52/300, Reward:85.0, Steps:85, Done:True\n", "Episode:53/300, Reward:72.0, Steps:72, Done:True\n", "Episode:54/300, Reward:114.0, Steps:114, Done:True\n", "Episode:55/300, Reward:97.0, Steps:97, Done:True\n", "Episode:56/300, Reward:101.0, Steps:101, Done:True\n", "Episode:57/300, Reward:104.0, Steps:104, Done:True\n", "Episode:58/300, Reward:58.0, Steps:58, Done:True\n", "Episode:59/300, Reward:11.0, Steps:11, Done:True\n", "Episode:60/300, Reward:56.0, Steps:56, Done:True\n", "Episode:61/300, Reward:74.0, Steps:74, Done:True\n", "Episode:62/300, Reward:51.0, Steps:51, Done:True\n", "Episode:63/300, Reward:113.0, Steps:113, Done:True\n", "Episode:64/300, Reward:48.0, Steps:48, Done:True\n", "Episode:65/300, Reward:97.0, Steps:97, Done:True\n", "Episode:66/300, Reward:59.0, Steps:59, Done:True\n", "Episode:67/300, Reward:200.0, Steps:200, Done:True\n", "Episode:68/300, Reward:67.0, Steps:67, Done:True\n", "Episode:69/300, Reward:200.0, Steps:200, Done:True\n", "Episode:70/300, Reward:45.0, Steps:45, Done:True\n", "Episode:71/300, Reward:48.0, Steps:48, Done:True\n", "Episode:72/300, Reward:90.0, Steps:90, Done:True\n", "Episode:73/300, Reward:47.0, Steps:47, Done:True\n", "Episode:74/300, Reward:94.0, Steps:94, Done:True\n", "Episode:75/300, Reward:107.0, Steps:107, Done:True\n", "Episode:76/300, Reward:12.0, Steps:12, Done:True\n", "Episode:77/300, Reward:30.0, Steps:30, Done:True\n", "Episode:78/300, Reward:62.0, Steps:62, Done:True\n", "Episode:79/300, Reward:64.0, Steps:64, Done:True\n", "Episode:80/300, Reward:41.0, Steps:41, Done:True\n", "Episode:81/300, Reward:67.0, Steps:67, Done:True\n", "Episode:82/300, Reward:45.0, Steps:45, Done:True\n", "Episode:83/300, Reward:130.0, Steps:130, Done:True\n", "Episode:84/300, Reward:50.0, Steps:50, Done:True\n", "Episode:85/300, Reward:51.0, Steps:51, Done:True\n", "Episode:86/300, Reward:67.0, Steps:67, Done:True\n", "Episode:87/300, Reward:37.0, Steps:37, Done:True\n", "Episode:88/300, Reward:41.0, Steps:41, Done:True\n", "Episode:89/300, Reward:54.0, Steps:54, Done:True\n", "Episode:90/300, Reward:93.0, Steps:93, Done:True\n", "Episode:91/300, Reward:71.0, Steps:71, Done:True\n", "Episode:92/300, Reward:102.0, Steps:102, Done:True\n", "Episode:93/300, Reward:55.0, Steps:55, Done:True\n", "Episode:94/300, Reward:73.0, Steps:73, Done:True\n", "Episode:95/300, Reward:61.0, Steps:61, Done:True\n", "Episode:96/300, Reward:16.0, Steps:16, Done:True\n", "Episode:97/300, Reward:61.0, Steps:61, Done:True\n", "Episode:98/300, Reward:79.0, Steps:79, Done:True\n", "Episode:99/300, Reward:76.0, Steps:76, Done:True\n", "Episode:100/300, Reward:32.0, Steps:32, Done:True\n", "Episode:101/300, Reward:95.0, Steps:95, Done:True\n", "Episode:102/300, Reward:83.0, Steps:83, Done:True\n", "Episode:103/300, Reward:41.0, Steps:41, Done:True\n", "Episode:104/300, Reward:30.0, Steps:30, Done:True\n", "Episode:105/300, Reward:83.0, Steps:83, Done:True\n", "Episode:106/300, Reward:95.0, Steps:95, Done:True\n", "Episode:107/300, Reward:104.0, Steps:104, Done:True\n", "Episode:108/300, Reward:98.0, Steps:98, Done:True\n", "Episode:109/300, Reward:109.0, Steps:109, Done:True\n", "Episode:110/300, Reward:63.0, Steps:63, Done:True\n", "Episode:111/300, Reward:98.0, Steps:98, Done:True\n", "Episode:112/300, Reward:105.0, Steps:105, Done:True\n", "Episode:113/300, Reward:99.0, Steps:99, Done:True\n", "Episode:114/300, Reward:200.0, Steps:200, Done:True\n", "Episode:115/300, Reward:200.0, Steps:200, Done:True\n", "Episode:116/300, Reward:47.0, Steps:47, Done:True\n", "Episode:117/300, Reward:98.0, Steps:98, Done:True\n", "Episode:118/300, Reward:200.0, Steps:200, Done:True\n", "Episode:119/300, Reward:52.0, Steps:52, Done:True\n", "Episode:120/300, Reward:55.0, Steps:55, Done:True\n", "Episode:121/300, Reward:200.0, Steps:200, Done:True\n", "Episode:122/300, Reward:200.0, Steps:200, Done:True\n", "Episode:123/300, Reward:200.0, Steps:200, Done:True\n", "Episode:124/300, Reward:200.0, Steps:200, Done:True\n", "Episode:125/300, Reward:200.0, Steps:200, Done:True\n", "Episode:126/300, Reward:40.0, Steps:40, Done:True\n", "Episode:127/300, Reward:42.0, Steps:42, Done:True\n", "Episode:128/300, Reward:101.0, Steps:101, Done:True\n", "Episode:129/300, Reward:200.0, Steps:200, Done:True\n", "Episode:130/300, Reward:70.0, Steps:70, Done:True\n", "Episode:131/300, Reward:175.0, Steps:175, Done:True\n", "Episode:132/300, Reward:90.0, Steps:90, Done:True\n", "Episode:133/300, Reward:81.0, Steps:81, Done:True\n", "Episode:134/300, Reward:61.0, Steps:61, Done:True\n", "Episode:135/300, Reward:74.0, Steps:74, Done:True\n", "Episode:136/300, Reward:68.0, Steps:68, Done:True\n", "Episode:137/300, Reward:50.0, Steps:50, Done:True\n", "Episode:138/300, Reward:51.0, Steps:51, Done:True\n", "Episode:139/300, Reward:99.0, Steps:99, Done:True\n", "Episode:140/300, Reward:87.0, Steps:87, Done:True\n", "Episode:141/300, Reward:94.0, Steps:94, Done:True\n", "Episode:142/300, Reward:51.0, Steps:51, Done:True\n", "Episode:143/300, Reward:200.0, Steps:200, Done:True\n", "Episode:144/300, Reward:55.0, Steps:55, Done:True\n", "Episode:145/300, Reward:200.0, Steps:200, Done:True\n", "Episode:146/300, Reward:57.0, Steps:57, Done:True\n", "Episode:147/300, Reward:129.0, Steps:129, Done:True\n", "Episode:148/300, Reward:74.0, Steps:74, Done:True\n", "Episode:149/300, Reward:108.0, Steps:108, Done:True\n", "Episode:150/300, Reward:63.0, Steps:63, Done:True\n", "Episode:151/300, Reward:200.0, Steps:200, Done:True\n", "Episode:152/300, Reward:103.0, Steps:103, Done:True\n", "Episode:153/300, Reward:129.0, Steps:129, Done:True\n", "Episode:154/300, Reward:77.0, Steps:77, Done:True\n", "Episode:155/300, Reward:129.0, Steps:129, Done:True\n", "Episode:156/300, Reward:200.0, Steps:200, Done:True\n", "Episode:157/300, Reward:181.0, Steps:181, Done:True\n", "Episode:158/300, Reward:200.0, Steps:200, Done:True\n", "Episode:159/300, Reward:136.0, Steps:136, Done:True\n", "Episode:160/300, Reward:200.0, Steps:200, Done:True\n", "Episode:161/300, Reward:181.0, Steps:181, Done:True\n", "Episode:162/300, Reward:120.0, Steps:120, Done:True\n", "Episode:163/300, Reward:190.0, Steps:190, Done:True\n", "Episode:164/300, Reward:200.0, Steps:200, Done:True\n", "Episode:165/300, Reward:200.0, Steps:200, Done:True\n", "Episode:166/300, Reward:200.0, Steps:200, Done:True\n", "Episode:167/300, Reward:200.0, Steps:200, Done:True\n", "Episode:168/300, Reward:200.0, Steps:200, Done:True\n", "Episode:169/300, Reward:200.0, Steps:200, Done:True\n", "Episode:170/300, Reward:89.0, Steps:89, Done:True\n", "Episode:171/300, Reward:74.0, Steps:74, Done:True\n", "Episode:172/300, Reward:200.0, Steps:200, Done:True\n", "Episode:173/300, Reward:200.0, Steps:200, Done:True\n", "Episode:174/300, Reward:200.0, Steps:200, Done:True\n", "Episode:175/300, Reward:200.0, Steps:200, Done:True\n", "Episode:176/300, Reward:93.0, Steps:93, Done:True\n", "Episode:177/300, Reward:139.0, Steps:139, Done:True\n", "Episode:178/300, Reward:78.0, Steps:78, Done:True\n", "Episode:179/300, Reward:200.0, Steps:200, Done:True\n", "Episode:180/300, Reward:200.0, Steps:200, Done:True\n", "Episode:181/300, Reward:200.0, Steps:200, Done:True\n", "Episode:182/300, Reward:200.0, Steps:200, Done:True\n", "Episode:183/300, Reward:200.0, Steps:200, Done:True\n", "Episode:184/300, Reward:200.0, Steps:200, Done:True\n", "Episode:185/300, Reward:200.0, Steps:200, Done:True\n", "Episode:186/300, Reward:200.0, Steps:200, Done:True\n", "Episode:187/300, Reward:200.0, Steps:200, Done:True\n", "Episode:188/300, Reward:200.0, Steps:200, Done:True\n", "Episode:189/300, Reward:200.0, Steps:200, Done:True\n", "Episode:190/300, Reward:200.0, Steps:200, Done:True\n", "Episode:191/300, Reward:200.0, Steps:200, Done:True\n", "Episode:192/300, Reward:200.0, Steps:200, Done:True\n", "Episode:193/300, Reward:190.0, Steps:190, Done:True\n", "Episode:194/300, Reward:200.0, Steps:200, Done:True\n", "Episode:195/300, Reward:200.0, Steps:200, Done:True\n", "Episode:196/300, Reward:200.0, Steps:200, Done:True\n", "Episode:197/300, Reward:200.0, Steps:200, Done:True\n", "Episode:198/300, Reward:200.0, Steps:200, Done:True\n", "Episode:199/300, Reward:200.0, Steps:200, Done:True\n", "Episode:200/300, Reward:200.0, Steps:200, Done:True\n", "Episode:201/300, Reward:200.0, Steps:200, Done:True\n", "Episode:202/300, Reward:200.0, Steps:200, Done:True\n", "Episode:203/300, Reward:67.0, Steps:67, Done:True\n", "Episode:204/300, Reward:200.0, Steps:200, Done:True\n", "Episode:205/300, Reward:200.0, Steps:200, Done:True\n", "Episode:206/300, Reward:200.0, Steps:200, Done:True\n", "Episode:207/300, Reward:200.0, Steps:200, Done:True\n", "Episode:208/300, Reward:200.0, Steps:200, Done:True\n", "Episode:209/300, Reward:200.0, Steps:200, Done:True\n", "Episode:210/300, Reward:200.0, Steps:200, Done:True\n", "Episode:211/300, Reward:200.0, Steps:200, Done:True\n", "Episode:212/300, Reward:200.0, Steps:200, Done:True\n", "Episode:213/300, Reward:200.0, Steps:200, Done:True\n", "Episode:214/300, Reward:200.0, Steps:200, Done:True\n", "Episode:215/300, Reward:200.0, Steps:200, Done:True\n", "Episode:216/300, Reward:200.0, Steps:200, Done:True\n", "Episode:217/300, Reward:200.0, Steps:200, Done:True\n", "Episode:218/300, Reward:44.0, Steps:44, Done:True\n", "Episode:219/300, Reward:200.0, Steps:200, Done:True\n", "Episode:220/300, Reward:200.0, Steps:200, Done:True\n", "Episode:221/300, Reward:200.0, Steps:200, Done:True\n", "Episode:222/300, Reward:200.0, Steps:200, Done:True\n", "Episode:223/300, Reward:200.0, Steps:200, Done:True\n", "Episode:224/300, Reward:200.0, Steps:200, Done:True\n", "Episode:225/300, Reward:200.0, Steps:200, Done:True\n", "Episode:226/300, Reward:200.0, Steps:200, Done:True\n", "Episode:227/300, Reward:200.0, Steps:200, Done:True\n", "Episode:228/300, Reward:200.0, Steps:200, Done:True\n", "Episode:229/300, Reward:200.0, Steps:200, Done:True\n", "Episode:230/300, Reward:200.0, Steps:200, Done:True\n", "Episode:231/300, Reward:200.0, Steps:200, Done:True\n", "Episode:232/300, Reward:200.0, Steps:200, Done:True\n", "Episode:233/300, Reward:200.0, Steps:200, Done:True\n", "Episode:234/300, Reward:200.0, Steps:200, Done:True\n", "Episode:235/300, Reward:200.0, Steps:200, Done:True\n", "Episode:236/300, Reward:200.0, Steps:200, Done:True\n", "Episode:237/300, Reward:200.0, Steps:200, Done:True\n", "Episode:238/300, Reward:200.0, Steps:200, Done:True\n", "Episode:239/300, Reward:200.0, Steps:200, Done:True\n", "Episode:240/300, Reward:200.0, Steps:200, Done:True\n", "Episode:241/300, Reward:200.0, Steps:200, Done:True\n", "Episode:242/300, Reward:126.0, Steps:126, Done:True\n", "Episode:243/300, Reward:200.0, Steps:200, Done:True\n", "Episode:244/300, Reward:200.0, Steps:200, Done:True\n", "Episode:245/300, Reward:200.0, Steps:200, Done:True\n", "Episode:246/300, Reward:200.0, Steps:200, Done:True\n", "Episode:247/300, Reward:200.0, Steps:200, Done:True\n", "Episode:248/300, Reward:118.0, Steps:118, Done:True\n", "Episode:249/300, Reward:200.0, Steps:200, Done:True\n", "Episode:250/300, Reward:200.0, Steps:200, Done:True\n", "Episode:251/300, Reward:99.0, Steps:99, Done:True\n", "Episode:252/300, Reward:145.0, Steps:145, Done:True\n", "Episode:253/300, Reward:200.0, Steps:200, Done:True\n", "Episode:254/300, Reward:200.0, Steps:200, Done:True\n", "Episode:255/300, Reward:200.0, Steps:200, Done:True\n", "Episode:256/300, Reward:200.0, Steps:200, Done:True\n", "Episode:257/300, Reward:130.0, Steps:130, Done:True\n", "Episode:258/300, Reward:170.0, Steps:170, Done:True\n", "Episode:259/300, Reward:200.0, Steps:200, Done:True\n", "Episode:260/300, Reward:200.0, Steps:200, Done:True\n", "Episode:261/300, Reward:200.0, Steps:200, Done:True\n", "Episode:262/300, Reward:200.0, Steps:200, Done:True\n", "Episode:263/300, Reward:200.0, Steps:200, Done:True\n", "Episode:264/300, Reward:200.0, Steps:200, Done:True\n", "Episode:265/300, Reward:200.0, Steps:200, Done:True\n", "Episode:266/300, Reward:200.0, Steps:200, Done:True\n", "Episode:267/300, Reward:200.0, Steps:200, Done:True\n", "Episode:268/300, Reward:200.0, Steps:200, Done:True\n", "Episode:269/300, Reward:200.0, Steps:200, Done:True\n", "Episode:270/300, Reward:200.0, Steps:200, Done:True\n", "Episode:271/300, Reward:200.0, Steps:200, Done:True\n", "Episode:272/300, Reward:135.0, Steps:135, Done:True\n", "Episode:273/300, Reward:200.0, Steps:200, Done:True\n", "Episode:274/300, Reward:200.0, Steps:200, Done:True\n", "Episode:275/300, Reward:200.0, Steps:200, Done:True\n", "Episode:276/300, Reward:200.0, Steps:200, Done:True\n", "Episode:277/300, Reward:200.0, Steps:200, Done:True\n", "Episode:278/300, Reward:200.0, Steps:200, Done:True\n", "Episode:279/300, Reward:200.0, Steps:200, Done:True\n", "Episode:280/300, Reward:200.0, Steps:200, Done:True\n", "Episode:281/300, Reward:200.0, Steps:200, Done:True\n", "Episode:282/300, Reward:200.0, Steps:200, Done:True\n", "Episode:283/300, Reward:200.0, Steps:200, Done:True\n", "Episode:284/300, Reward:200.0, Steps:200, Done:True\n", "Episode:285/300, Reward:200.0, Steps:200, Done:True\n", "Episode:286/300, Reward:200.0, Steps:200, Done:True\n", "Episode:287/300, Reward:200.0, Steps:200, Done:True\n", "Episode:288/300, Reward:200.0, Steps:200, Done:True\n", "Episode:289/300, Reward:200.0, Steps:200, Done:True\n", "Episode:290/300, Reward:200.0, Steps:200, Done:True\n", "Episode:291/300, Reward:200.0, Steps:200, Done:True\n", "Episode:292/300, Reward:200.0, Steps:200, Done:True\n", "Episode:293/300, Reward:200.0, Steps:200, Done:True\n", "Episode:294/300, Reward:200.0, Steps:200, Done:True\n", "Episode:295/300, Reward:200.0, Steps:200, Done:True\n", "Episode:296/300, Reward:200.0, Steps:200, Done:True\n", "Episode:297/300, Reward:200.0, Steps:200, Done:True\n", "Episode:298/300, Reward:200.0, Steps:200, Done:True\n", "Episode:299/300, Reward:200.0, Steps:200, Done:True\n", "Episode:300/300, Reward:200.0, Steps:200, Done:True\n", "Complete training!\n", "results saved!\n" ] }, { "output_type": "display_data", "data": { "text/plain": "
", "image/svg+xml": "\n\n\n \n \n \n \n 2021-03-29T19:53:51.889101\n image/svg+xml\n \n \n Matplotlib v3.4.0, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "\n" }, "metadata": {} } ], "source": [ "cfg = DQNConfig()\n", "env = gym.make('CartPole-v0')\n", "env.seed(1)\n", "state_dim = env.observation_space.shape[0]\n", "action_dim = env.action_space.n\n", "agent = DQN(state_dim,action_dim,cfg)\n", "rewards,ma_rewards = train(cfg,env,agent)\n", "agent.save(path=SAVED_MODEL_PATH)\n", "save_results(rewards,ma_rewards,tag='train',path=RESULT_PATH)\n", "plot_rewards(rewards,ma_rewards,tag=\"train\",algo = cfg.algo,path=RESULT_PATH)" ] } ] }