diff --git a/codes/PPO/outputs/CartPole-v0/20211231-193837/models/ppo_actor.pt b/codes/PPO/outputs/CartPole-v0/20211231-193837/models/ppo_actor.pt deleted file mode 100644 index 36fa194..0000000 Binary files a/codes/PPO/outputs/CartPole-v0/20211231-193837/models/ppo_actor.pt and /dev/null differ diff --git a/codes/PPO/outputs/CartPole-v0/20211231-193837/models/ppo_critic.pt b/codes/PPO/outputs/CartPole-v0/20211231-193837/models/ppo_critic.pt deleted file mode 100644 index eaf611a..0000000 Binary files a/codes/PPO/outputs/CartPole-v0/20211231-193837/models/ppo_critic.pt and /dev/null differ diff --git a/codes/PPO/outputs/CartPole-v0/20211231-193837/results/test_rewards_curve.png b/codes/PPO/outputs/CartPole-v0/20211231-193837/results/test_rewards_curve.png deleted file mode 100644 index 961f15d..0000000 Binary files a/codes/PPO/outputs/CartPole-v0/20211231-193837/results/test_rewards_curve.png and /dev/null differ diff --git a/codes/PPO/outputs/CartPole-v0/20211231-193837/results/train_ma_rewards.npy b/codes/PPO/outputs/CartPole-v0/20211231-193837/results/train_ma_rewards.npy deleted file mode 100644 index b2254f0..0000000 Binary files a/codes/PPO/outputs/CartPole-v0/20211231-193837/results/train_ma_rewards.npy and /dev/null differ diff --git a/codes/PPO/outputs/CartPole-v0/20211231-193837/results/train_rewards.npy b/codes/PPO/outputs/CartPole-v0/20211231-193837/results/train_rewards.npy deleted file mode 100644 index c67c7d7..0000000 Binary files a/codes/PPO/outputs/CartPole-v0/20211231-193837/results/train_rewards.npy and /dev/null differ diff --git a/codes/PPO/outputs/CartPole-v0/20211231-193837/results/train_rewards_curve.png b/codes/PPO/outputs/CartPole-v0/20211231-193837/results/train_rewards_curve.png deleted file mode 100644 index cf01ae0..0000000 Binary files a/codes/PPO/outputs/CartPole-v0/20211231-193837/results/train_rewards_curve.png and /dev/null differ diff --git a/codes/PPO/task0.py b/codes/PPO/task0.py deleted file mode 100644 index 6a73ff8..0000000 --- a/codes/PPO/task0.py +++ /dev/null @@ -1,131 +0,0 @@ -import sys,os -curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 -parent_path = os.path.dirname(curr_path) # 父路径 -sys.path.append(parent_path) # 添加路径到系统路径 - -import gym -import torch -import numpy as np -import datetime -from common.utils import plot_rewards -from common.utils import save_results,make_dir -from ppo2 import PPO - -curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 - -class Config: - def __init__(self) -> None: - ################################## 环境超参数 ################################### - self.algo_name = "PPO" # 算法名称 - self.env_name = 'CartPole-v0' # 环境名称 - self.continuous = False # 环境是否为连续动作 - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测GPU - self.seed = 10 # 随机种子,置0则不设置随机种子 - self.train_eps = 200 # 训练的回合数 - self.test_eps = 20 # 测试的回合数 - ################################################################################ - - ################################## 算法超参数 #################################### - self.batch_size = 5 # mini-batch SGD中的批量大小 - self.gamma = 0.95 # 强化学习中的折扣因子 - self.n_epochs = 4 - self.actor_lr = 0.0003 # actor的学习率 - self.critic_lr = 0.0003 # critic的学习率 - self.gae_lambda = 0.95 - self.policy_clip = 0.2 - self.hidden_dim = 256 - self.update_fre = 20 # 策略更新频率 - ################################################################################ - - ################################# 保存结果相关参数 ################################ - self.result_path = curr_path+"/outputs/" + self.env_name + \ - '/'+curr_time+'/results/' # 保存结果的路径 - self.model_path = curr_path+"/outputs/" + self.env_name + \ - '/'+curr_time+'/models/' # 保存模型的路径 - self.save = True # 是否保存图片 - ################################################################################ - -def env_agent_config(cfg): - ''' 创建环境和智能体 - ''' - env = gym.make(cfg.env_name) # 创建环境 - n_states = env.observation_space.shape[0] # 状态维度 - if cfg.continuous: - n_actions = env.action_space.shape[0] # 动作维度 - else: - n_actions = env.action_space.n # 动作维度 - agent = PPO(n_states, n_actions, cfg) # 创建智能体 - if cfg.seed !=0: # 设置随机种子 - torch.manual_seed(cfg.seed) - env.seed(cfg.seed) - np.random.seed(cfg.seed) - return env, agent - -def train(cfg,env,agent): - print('开始训练!') - print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') - rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 - steps = 0 - for i_ep in range(cfg.train_eps): - state = env.reset() - done = False - ep_reward = 0 - while not done: - action, prob, val = agent.choose_action(state) - state_, reward, done, _ = env.step(action) - steps += 1 - ep_reward += reward - agent.memory.push(state, action, prob, val, reward, done) - if steps % cfg.update_fre == 0: - agent.update() - state = state_ - rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward) - else: - ma_rewards.append(ep_reward) - if (i_ep+1)%10 == 0: - print(f"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.2f}") - print('完成训练!') - return rewards,ma_rewards - -def test(cfg,env,agent): - print('开始测试!') - print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') - rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 - for i_ep in range(cfg.test_eps): - state = env.reset() - done = False - ep_reward = 0 - while not done: - action, prob, val = agent.choose_action(state) - state_, reward, done, _ = env.step(action) - ep_reward += reward - state = state_ - rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append( - 0.9*ma_rewards[-1]+0.1*ep_reward) - else: - ma_rewards.append(ep_reward) - print('回合:{}/{}, 奖励:{}'.format(i_ep+1, cfg.test_eps, ep_reward)) - print('完成训练!') - return rewards,ma_rewards - -if __name__ == "__main__": - cfg = Config() - # 训练 - env,agent = env_agent_config(cfg) - rewards, ma_rewards = train(cfg, env, agent) - make_dir(cfg.result_path, cfg.model_path) # 创建保存结果和模型路径的文件夹 - agent.save(path=cfg.model_path) - save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) - plot_rewards(rewards, ma_rewards, cfg, tag="train") - # 测试 - env,agent = env_agent_config(cfg) - agent.load(path=cfg.model_path) - rewards,ma_rewards = test(cfg,env,agent) - save_results(rewards,ma_rewards,tag='test',path=cfg.result_path) - plot_rewards(rewards,ma_rewards,cfg,tag="test") \ No newline at end of file diff --git a/projects/.gitignore b/projects/.gitignore new file mode 100644 index 0000000..764cbb7 --- /dev/null +++ b/projects/.gitignore @@ -0,0 +1,5 @@ +.DS_Store +.ipynb_checkpoints +__pycache__ +.vscode +test.py \ No newline at end of file diff --git a/projects/LICENSE b/projects/LICENSE new file mode 100644 index 0000000..673d927 --- /dev/null +++ b/projects/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 John Jim + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/projects/README.md b/projects/README.md new file mode 100644 index 0000000..ee63dd4 --- /dev/null +++ b/projects/README.md @@ -0,0 +1,49 @@ +## 0、写在前面 + +本项目用于学习RL基础算法,尽量做到: **注释详细**(经过很长时间的纠结,还是中文注释好了!!!),**结构清晰**。 + +代码结构主要分为以下几个脚本: + +* ```[algorithm_name].py```:即保存算法的脚本,例如```dqn.py```,每种算法都会有一定的基础模块,例如```Replay Buffer```、```MLP```(多层感知机)等等; +* ```task.py```: 即保存任务的脚本,基本包括基于```argparse```模块的参数,训练以及测试函数等等; +* ```utils.py```:该脚本用于保存诸如存储结果以及画图的软件,在实际项目或研究中,推荐大家使用```Tensorboard```来保存结果,然后使用诸如```matplotlib```以及```seabron```来进一步画图。 + +## 运行环境 + +python 3.7、pytorch 1.6.0-1.9.0、gym 0.21.0 + +或者在```README.md```目录下执行以下命令复现环境: +```bash +conda env create -f environment.yaml +``` +## 使用说明 + +直接运行带有```train```的py文件或ipynb文件会进行训练默认的任务; +也可以运行带有```task```的py文件训练不同的任务 + +## 内容导航 + +| 算法名称 | 相关论文材料 | 环境 | 备注 | +| :--------------------------------------: | :----------------------------------------------------------: | ----------------------------------------- | :--------------------------------: | +| [On-Policy First-Visit MC](./MonteCarlo) | [medium blog](https://medium.com/analytics-vidhya/monte-carlo-methods-in-reinforcement-learning-part-1-on-policy-methods-1f004d59686a) | [Racetrack](./envs/racetrack_env.md) | | +| [Q-Learning](./QLearning) | [towardsdatascience blog](https://towardsdatascience.com/simple-reinforcement-learning-q-learning-fcddc4b6fe56),[q learning paper](https://ieeexplore.ieee.org/document/8836506) | [CliffWalking-v0](./envs/gym_info.md) | | +| [Sarsa](./Sarsa) | [geeksforgeeks blog](https://www.geeksforgeeks.org/sarsa-reinforcement-learning/) | [Racetrack](./envs/racetrack_env.md) | | +| [DQN](./DQN) | [DQN Paper](https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf),[Nature DQN Paper](https://www.nature.com/articles/nature14236) | [CartPole-v0](./envs/gym_info.md) | | +| [DQN-cnn](./DQN_cnn) | [DQN Paper](https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf) | [CartPole-v0](./envs/gym_info.md) | 与DQN相比使用了CNN而不是全链接网络 | +| [DoubleDQN](./DoubleDQN) | [DoubleDQN Paper](https://arxiv.org/abs/1509.06461) | [CartPole-v0](./envs/gym_info.md) | | +| [Hierarchical DQN](HierarchicalDQN) | [H-DQN Paper](https://arxiv.org/abs/1604.06057) | [CartPole-v0](./envs/gym_info.md) | | +| [PolicyGradient](./PolicyGradient) | [Lil'log](https://lilianweng.github.io/lil-log/2018/04/08/policy-gradient-algorithms.html) | [CartPole-v0](./envs/gym_info.md) | | +| [A2C](./A2C) | [A3C Paper](https://arxiv.org/abs/1602.01783) | [CartPole-v0](./envs/gym_info.md) | | +| [SAC](./SoftActorCritic) | [SAC Paper](https://arxiv.org/abs/1801.01290) | [Pendulum-v0](./envs/gym_info.md) | | +| [PPO](./PPO) | [PPO paper](https://arxiv.org/abs/1707.06347) | [CartPole-v0](./envs/gym_info.md) | | +| [DDPG](./DDPG) | [DDPG Paper](https://arxiv.org/abs/1509.02971) | [Pendulum-v0](./envs/gym_info.md) | | +| [TD3](./TD3) | [TD3 Paper](https://arxiv.org/abs/1802.09477) | [HalfCheetah-v2]((./envs/mujoco_info.md)) | | + + +## Refs + +[RL-Adventure-2](https://github.com/higgsfield/RL-Adventure-2) + +[RL-Adventure](https://github.com/higgsfield/RL-Adventure) + +[Google 开源项目风格指南——中文版](https://zh-google-styleguide.readthedocs.io/en/latest/google-python-styleguide/python_style_rules/#comments) \ No newline at end of file diff --git a/codes/A2C/README.md b/projects/codes/A2C/README.md similarity index 100% rename from codes/A2C/README.md rename to projects/codes/A2C/README.md diff --git a/codes/A2C/a2c.py b/projects/codes/A2C/a2c.py similarity index 100% rename from codes/A2C/a2c.py rename to projects/codes/A2C/a2c.py diff --git a/codes/A2C/outputs/CartPole-v0/20220713-221850/results/params.json b/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/params.json similarity index 100% rename from codes/A2C/outputs/CartPole-v0/20220713-221850/results/params.json rename to projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/params.json diff --git a/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy b/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy similarity index 100% rename from codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy rename to projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy diff --git a/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy b/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy similarity index 100% rename from codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy rename to projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy diff --git a/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png similarity index 100% rename from codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png rename to projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png diff --git a/codes/A2C/task0.py b/projects/codes/A2C/task0.py similarity index 100% rename from codes/A2C/task0.py rename to projects/codes/A2C/task0.py diff --git a/codes/DDPG/ddpg.py b/projects/codes/DDPG/ddpg.py similarity index 100% rename from codes/DDPG/ddpg.py rename to projects/codes/DDPG/ddpg.py diff --git a/codes/DDPG/env.py b/projects/codes/DDPG/env.py similarity index 100% rename from codes/DDPG/env.py rename to projects/codes/DDPG/env.py diff --git a/codes/DDPG/outputs/Pendulum-v1/20220713-225402/models/checkpoint.pt b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/models/checkpoint.pt similarity index 100% rename from codes/DDPG/outputs/Pendulum-v1/20220713-225402/models/checkpoint.pt rename to projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/models/checkpoint.pt diff --git a/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/params.json b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/params.json similarity index 100% rename from codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/params.json rename to projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/params.json diff --git a/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_ma_rewards.npy b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_ma_rewards.npy similarity index 100% rename from codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_ma_rewards.npy rename to projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_ma_rewards.npy diff --git a/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards.npy b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards.npy similarity index 100% rename from codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards.npy rename to projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards.npy diff --git a/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards_curve.png b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards_curve.png similarity index 100% rename from codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards_curve.png rename to projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards_curve.png diff --git a/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_ma_rewards.npy b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_ma_rewards.npy similarity index 100% rename from codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_ma_rewards.npy rename to projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_ma_rewards.npy diff --git a/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards.npy b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards.npy similarity index 100% rename from codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards.npy rename to projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards.npy diff --git a/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards_curve.png b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards_curve.png similarity index 100% rename from codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards_curve.png rename to projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards_curve.png diff --git a/codes/DDPG/task0.py b/projects/codes/DDPG/task0.py similarity index 100% rename from codes/DDPG/task0.py rename to projects/codes/DDPG/task0.py diff --git a/codes/DQN/README.md b/projects/codes/DQN/README.md similarity index 100% rename from codes/DQN/README.md rename to projects/codes/DQN/README.md diff --git a/codes/DQN/assets/eval_rewards_curve.png b/projects/codes/DQN/assets/eval_rewards_curve.png similarity index 100% rename from codes/DQN/assets/eval_rewards_curve.png rename to projects/codes/DQN/assets/eval_rewards_curve.png diff --git a/codes/DQN/assets/image-20210507162813393.png b/projects/codes/DQN/assets/image-20210507162813393.png similarity index 100% rename from codes/DQN/assets/image-20210507162813393.png rename to projects/codes/DQN/assets/image-20210507162813393.png diff --git a/codes/DQN/assets/rewards_curve_train.png b/projects/codes/DQN/assets/rewards_curve_train.png similarity index 100% rename from codes/DQN/assets/rewards_curve_train.png rename to projects/codes/DQN/assets/rewards_curve_train.png diff --git a/codes/DQN/assets/train_rewards_curve.png b/projects/codes/DQN/assets/train_rewards_curve.png similarity index 100% rename from codes/DQN/assets/train_rewards_curve.png rename to projects/codes/DQN/assets/train_rewards_curve.png diff --git a/codes/DQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png b/projects/codes/DQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png similarity index 100% rename from codes/DQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png rename to projects/codes/DQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png diff --git a/codes/DQN/dqn.py b/projects/codes/DQN/dqn.py similarity index 100% rename from codes/DQN/dqn.py rename to projects/codes/DQN/dqn.py diff --git a/codes/DQN/dqn_cnn.py b/projects/codes/DQN/dqn_cnn.py similarity index 100% rename from codes/DQN/dqn_cnn.py rename to projects/codes/DQN/dqn_cnn.py diff --git a/codes/DQN/dqn_cnn2.py b/projects/codes/DQN/dqn_cnn2.py similarity index 100% rename from codes/DQN/dqn_cnn2.py rename to projects/codes/DQN/dqn_cnn2.py diff --git a/codes/DQN/outputs/CartPole-v0/20220713-211653/models/dqn_checkpoint.pth b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/models/dqn_checkpoint.pth similarity index 100% rename from codes/DQN/outputs/CartPole-v0/20220713-211653/models/dqn_checkpoint.pth rename to projects/codes/DQN/outputs/CartPole-v0/20220713-211653/models/dqn_checkpoint.pth diff --git a/codes/DQN/outputs/CartPole-v0/20220713-211653/results/params.json b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/params.json similarity index 100% rename from codes/DQN/outputs/CartPole-v0/20220713-211653/results/params.json rename to projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/params.json diff --git a/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_ma_rewards.npy b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_ma_rewards.npy similarity index 100% rename from codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_ma_rewards.npy rename to projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_ma_rewards.npy diff --git a/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards.npy b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards.npy similarity index 100% rename from codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards.npy rename to projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards.npy diff --git a/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards_curve.png b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards_curve.png similarity index 100% rename from codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards_curve.png rename to projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards_curve.png diff --git a/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_steps.npy b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_steps.npy similarity index 100% rename from codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_steps.npy rename to projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_steps.npy diff --git a/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_ma_rewards.npy b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_ma_rewards.npy similarity index 100% rename from codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_ma_rewards.npy rename to projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_ma_rewards.npy diff --git a/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards.npy b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards.npy similarity index 100% rename from codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards.npy rename to projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards.npy diff --git a/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards_curve.png b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards_curve.png similarity index 100% rename from codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards_curve.png rename to projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards_curve.png diff --git a/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_steps.npy b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_steps.npy similarity index 100% rename from codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_steps.npy rename to projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_steps.npy diff --git a/codes/DQN/task0.py b/projects/codes/DQN/task0.py similarity index 100% rename from codes/DQN/task0.py rename to projects/codes/DQN/task0.py diff --git a/codes/DoubleDQN/README.md b/projects/codes/DoubleDQN/README.md similarity index 100% rename from codes/DoubleDQN/README.md rename to projects/codes/DoubleDQN/README.md diff --git a/codes/DoubleDQN/assets/20201222145725907.png b/projects/codes/DoubleDQN/assets/20201222145725907.png similarity index 100% rename from codes/DoubleDQN/assets/20201222145725907.png rename to projects/codes/DoubleDQN/assets/20201222145725907.png diff --git a/codes/DoubleDQN/assets/20201222150225327.png b/projects/codes/DoubleDQN/assets/20201222150225327.png similarity index 100% rename from codes/DoubleDQN/assets/20201222150225327.png rename to projects/codes/DoubleDQN/assets/20201222150225327.png diff --git a/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837128.png b/projects/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837128.png similarity index 100% rename from codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837128.png rename to projects/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837128.png diff --git a/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837146.png b/projects/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837146.png similarity index 100% rename from codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837146.png rename to projects/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837146.png diff --git a/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837157.png b/projects/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837157.png similarity index 100% rename from codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837157.png rename to projects/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210328110837157.png diff --git a/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png b/projects/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png similarity index 100% rename from codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png rename to projects/codes/DoubleDQN/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png diff --git a/codes/DoubleDQN/double_dqn.py b/projects/codes/DoubleDQN/double_dqn.py similarity index 100% rename from codes/DoubleDQN/double_dqn.py rename to projects/codes/DoubleDQN/double_dqn.py diff --git a/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/models/checkpoint.pth b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/models/checkpoint.pth similarity index 100% rename from codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/models/checkpoint.pth rename to projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/models/checkpoint.pth diff --git a/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/params.json b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/params.json similarity index 100% rename from codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/params.json rename to projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/params.json diff --git a/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_ma_rewards.npy b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_ma_rewards.npy similarity index 100% rename from codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_ma_rewards.npy rename to projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_ma_rewards.npy diff --git a/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards.npy b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards.npy similarity index 100% rename from codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards.npy rename to projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards.npy diff --git a/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards_curve.png b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards_curve.png similarity index 100% rename from codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards_curve.png rename to projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards_curve.png diff --git a/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_ma_rewards.npy b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_ma_rewards.npy similarity index 100% rename from codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_ma_rewards.npy rename to projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_ma_rewards.npy diff --git a/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards.npy b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards.npy similarity index 100% rename from codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards.npy rename to projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards.npy diff --git a/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards_curve.png b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards_curve.png similarity index 100% rename from codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards_curve.png rename to projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards_curve.png diff --git a/codes/DoubleDQN/task0.py b/projects/codes/DoubleDQN/task0.py similarity index 100% rename from codes/DoubleDQN/task0.py rename to projects/codes/DoubleDQN/task0.py diff --git a/codes/DuelingDQN/assets/task0_train_20211112021954.png b/projects/codes/DuelingDQN/assets/task0_train_20211112021954.png similarity index 100% rename from codes/DuelingDQN/assets/task0_train_20211112021954.png rename to projects/codes/DuelingDQN/assets/task0_train_20211112021954.png diff --git a/codes/DuelingDQN/task0_train.ipynb b/projects/codes/DuelingDQN/task0_train.ipynb similarity index 100% rename from codes/DuelingDQN/task0_train.ipynb rename to projects/codes/DuelingDQN/task0_train.ipynb diff --git a/codes/GAE/task0_train.py b/projects/codes/GAE/task0_train.py similarity index 100% rename from codes/GAE/task0_train.py rename to projects/codes/GAE/task0_train.py diff --git a/codes/HierarchicalDQN/README.md b/projects/codes/HierarchicalDQN/README.md similarity index 100% rename from codes/HierarchicalDQN/README.md rename to projects/codes/HierarchicalDQN/README.md diff --git a/codes/HierarchicalDQN/agent.py b/projects/codes/HierarchicalDQN/agent.py similarity index 100% rename from codes/HierarchicalDQN/agent.py rename to projects/codes/HierarchicalDQN/agent.py diff --git a/codes/HierarchicalDQN/assets/image-20210331153115575.png b/projects/codes/HierarchicalDQN/assets/image-20210331153115575.png similarity index 100% rename from codes/HierarchicalDQN/assets/image-20210331153115575.png rename to projects/codes/HierarchicalDQN/assets/image-20210331153115575.png diff --git a/codes/HierarchicalDQN/assets/image-20210331153542314.png b/projects/codes/HierarchicalDQN/assets/image-20210331153542314.png similarity index 100% rename from codes/HierarchicalDQN/assets/image-20210331153542314.png rename to projects/codes/HierarchicalDQN/assets/image-20210331153542314.png diff --git a/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/models/meta_checkpoint.pth b/projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/models/meta_checkpoint.pth similarity index 100% rename from codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/models/meta_checkpoint.pth rename to projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/models/meta_checkpoint.pth diff --git a/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/models/policy_checkpoint.pth b/projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/models/policy_checkpoint.pth similarity index 100% rename from codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/models/policy_checkpoint.pth rename to projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/models/policy_checkpoint.pth diff --git a/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_ma_rewards.npy b/projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_ma_rewards.npy similarity index 100% rename from codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_ma_rewards.npy rename to projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_ma_rewards.npy diff --git a/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_rewards.npy b/projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_rewards.npy similarity index 100% rename from codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_rewards.npy rename to projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_rewards.npy diff --git a/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_rewards_curve.png b/projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_rewards_curve.png similarity index 100% rename from codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_rewards_curve.png rename to projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/test_rewards_curve.png diff --git a/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_ma_rewards.npy b/projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_ma_rewards.npy similarity index 100% rename from codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_ma_rewards.npy rename to projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_ma_rewards.npy diff --git a/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_rewards.npy b/projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_rewards.npy similarity index 100% rename from codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_rewards.npy rename to projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_rewards.npy diff --git a/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_rewards_curve.png b/projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_rewards_curve.png similarity index 100% rename from codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_rewards_curve.png rename to projects/codes/HierarchicalDQN/outputs/CartPole-v0/20211221-200119/results/train_rewards_curve.png diff --git a/codes/HierarchicalDQN/task0.py b/projects/codes/HierarchicalDQN/task0.py similarity index 100% rename from codes/HierarchicalDQN/task0.py rename to projects/codes/HierarchicalDQN/task0.py diff --git a/codes/HierarchicalDQN/train.py b/projects/codes/HierarchicalDQN/train.py similarity index 100% rename from codes/HierarchicalDQN/train.py rename to projects/codes/HierarchicalDQN/train.py diff --git a/codes/MonteCarlo/README.md b/projects/codes/MonteCarlo/README.md similarity index 100% rename from codes/MonteCarlo/README.md rename to projects/codes/MonteCarlo/README.md diff --git a/codes/MonteCarlo/agent.py b/projects/codes/MonteCarlo/agent.py similarity index 100% rename from codes/MonteCarlo/agent.py rename to projects/codes/MonteCarlo/agent.py diff --git a/codes/MonteCarlo/assets/mc_control_algo.png b/projects/codes/MonteCarlo/assets/mc_control_algo.png similarity index 100% rename from codes/MonteCarlo/assets/mc_control_algo.png rename to projects/codes/MonteCarlo/assets/mc_control_algo.png diff --git a/codes/MonteCarlo/outputs/Racetrack/20210505-165945/models/Q_table b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/models/Q_table similarity index 100% rename from codes/MonteCarlo/outputs/Racetrack/20210505-165945/models/Q_table rename to projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/models/Q_table diff --git a/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_ma_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_ma_rewards.npy similarity index 100% rename from codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_ma_rewards.npy rename to projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_ma_rewards.npy diff --git a/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards.npy similarity index 100% rename from codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards.npy rename to projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards.npy diff --git a/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards_curve.png b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards_curve.png similarity index 100% rename from codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards_curve.png rename to projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards_curve.png diff --git a/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_ma_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_ma_rewards.npy similarity index 100% rename from codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_ma_rewards.npy rename to projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_ma_rewards.npy diff --git a/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards.npy similarity index 100% rename from codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards.npy rename to projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards.npy diff --git a/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards_curve.png b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards_curve.png similarity index 100% rename from codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards_curve.png rename to projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards_curve.png diff --git a/codes/MonteCarlo/task0_train.py b/projects/codes/MonteCarlo/task0_train.py similarity index 100% rename from codes/MonteCarlo/task0_train.py rename to projects/codes/MonteCarlo/task0_train.py diff --git a/codes/NoisyDQN/noisy_dqn.py b/projects/codes/NoisyDQN/noisy_dqn.py similarity index 100% rename from codes/NoisyDQN/noisy_dqn.py rename to projects/codes/NoisyDQN/noisy_dqn.py diff --git a/codes/NoisyDQN/task0_train.ipynb b/projects/codes/NoisyDQN/task0_train.ipynb similarity index 100% rename from codes/NoisyDQN/task0_train.ipynb rename to projects/codes/NoisyDQN/task0_train.ipynb diff --git a/codes/PPO/README.md b/projects/codes/PPO/README.md similarity index 100% rename from codes/PPO/README.md rename to projects/codes/PPO/README.md diff --git a/codes/PPO/assets/20210323154236878.png b/projects/codes/PPO/assets/20210323154236878.png similarity index 100% rename from codes/PPO/assets/20210323154236878.png rename to projects/codes/PPO/assets/20210323154236878.png diff --git a/codes/PPO/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210405110725113.png b/projects/codes/PPO/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210405110725113.png similarity index 100% rename from codes/PPO/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210405110725113.png rename to projects/codes/PPO/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210405110725113.png diff --git a/codes/PPO/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png b/projects/codes/PPO/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png similarity index 100% rename from codes/PPO/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png rename to projects/codes/PPO/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70.png diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_actor.pt b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_actor.pt new file mode 100644 index 0000000..5419b72 Binary files /dev/null and b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_actor.pt differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_critic.pt b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_critic.pt new file mode 100644 index 0000000..af97c9b Binary files /dev/null and b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_critic.pt differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/params.json b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/params.json new file mode 100644 index 0000000..748044c --- /dev/null +++ b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/params.json @@ -0,0 +1,20 @@ +{ + "algo_name": "PPO", + "env_name": "CartPole-v0", + "continuous": false, + "train_eps": 200, + "test_eps": 20, + "gamma": 0.99, + "batch_size": 5, + "n_epochs": 4, + "actor_lr": 0.0003, + "critic_lr": 0.0003, + "gae_lambda": 0.95, + "policy_clip": 0.2, + "update_fre": 20, + "hidden_dim": 256, + "device": "cpu", + "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\PPO/outputs/CartPole-v0/20220731-233512/results/", + "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\PPO/outputs/CartPole-v0/20220731-233512/models/", + "save_fig": true +} \ No newline at end of file diff --git a/codes/PPO/outputs/CartPole-v0/20211231-193837/results/test_ma_rewards.npy b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_ma_rewards.npy similarity index 100% rename from codes/PPO/outputs/CartPole-v0/20211231-193837/results/test_ma_rewards.npy rename to projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_ma_rewards.npy diff --git a/codes/PPO/outputs/CartPole-v0/20211231-193837/results/test_rewards.npy b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_rewards.npy similarity index 100% rename from codes/PPO/outputs/CartPole-v0/20211231-193837/results/test_rewards.npy rename to projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_rewards.npy diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_rewards_curve.png b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_rewards_curve.png new file mode 100644 index 0000000..b52cc37 Binary files /dev/null and b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_rewards_curve.png differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_ma_rewards.npy b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_ma_rewards.npy new file mode 100644 index 0000000..11249ed Binary files /dev/null and b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_ma_rewards.npy differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards.npy b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards.npy new file mode 100644 index 0000000..078b31f Binary files /dev/null and b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards.npy differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards_curve.png b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards_curve.png new file mode 100644 index 0000000..7a4f892 Binary files /dev/null and b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards_curve.png differ diff --git a/codes/PPO/ppo2.py b/projects/codes/PPO/ppo2.py similarity index 100% rename from codes/PPO/ppo2.py rename to projects/codes/PPO/ppo2.py diff --git a/projects/codes/PPO/task0.py b/projects/codes/PPO/task0.py new file mode 100644 index 0000000..9cd5063 --- /dev/null +++ b/projects/codes/PPO/task0.py @@ -0,0 +1,132 @@ +import sys,os +curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 +parent_path = os.path.dirname(curr_path) # 父路径 +sys.path.append(parent_path) # 添加路径到系统路径 + +import gym +import torch +import numpy as np +import datetime +import argparse +from common.utils import plot_rewards,save_args,save_results,make_dir +from ppo2 import PPO + +def get_args(): + """ Hyperparameters + """ + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 + parser = argparse.ArgumentParser(description="hyperparameters") + parser.add_argument('--algo_name',default='PPO',type=str,help="name of algorithm") + parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment") + parser.add_argument('--continuous',default=False,type=bool,help="if PPO is continous") # PPO既可适用于连续动作空间,也可以适用于离散动作空间 + parser.add_argument('--train_eps',default=200,type=int,help="episodes of training") + parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") + parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") + parser.add_argument('--batch_size',default=5,type=int) # mini-batch SGD中的批量大小 + parser.add_argument('--n_epochs',default=4,type=int) + parser.add_argument('--actor_lr',default=0.0003,type=float,help="learning rate of actor net") + parser.add_argument('--critic_lr',default=0.0003,type=float,help="learning rate of critic net") + parser.add_argument('--gae_lambda',default=0.95,type=float) + parser.add_argument('--policy_clip',default=0.2,type=float) # PPO-clip中的clip参数,一般是0.1~0.2左右 + parser.add_argument('--update_fre',default=20,type=int) + parser.add_argument('--hidden_dim',default=256,type=int) + parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") + parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ + '/' + curr_time + '/results/' ) + parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ + '/' + curr_time + '/models/' ) # path to save models + parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") + args = parser.parse_args() + return args + +def env_agent_config(cfg,seed = 1): + ''' 创建环境和智能体 + ''' + env = gym.make(cfg.env_name) # 创建环境 + n_states = env.observation_space.shape[0] # 状态维度 + if cfg.continuous: + n_actions = env.action_space.shape[0] # 动作维度 + else: + n_actions = env.action_space.n # 动作维度 + agent = PPO(n_states, n_actions, cfg) # 创建智能体 + if seed !=0: # 设置随机种子 + torch.manual_seed(seed) + env.seed(seed) + np.random.seed(seed) + return env, agent + +def train(cfg,env,agent): + print('开始训练!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + rewards = [] # 记录所有回合的奖励 + ma_rewards = [] # 记录所有回合的滑动平均奖励 + steps = 0 + for i_ep in range(cfg.train_eps): + state = env.reset() + done = False + ep_reward = 0 + while not done: + action, prob, val = agent.choose_action(state) + state_, reward, done, _ = env.step(action) + steps += 1 + ep_reward += reward + agent.memory.push(state, action, prob, val, reward, done) + if steps % cfg.update_fre == 0: + agent.update() + state = state_ + rewards.append(ep_reward) + if ma_rewards: + ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward) + else: + ma_rewards.append(ep_reward) + if (i_ep+1)%10 == 0: + print(f"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.2f}") + print('完成训练!') + env.close() + res_dic = {'rewards':rewards,'ma_rewards':ma_rewards} + return res_dic + +def test(cfg,env,agent): + print('开始测试!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + rewards = [] # 记录所有回合的奖励 + ma_rewards = [] # 记录所有回合的滑动平均奖励 + for i_ep in range(cfg.test_eps): + state = env.reset() + done = False + ep_reward = 0 + while not done: + action, prob, val = agent.choose_action(state) + state_, reward, done, _ = env.step(action) + ep_reward += reward + state = state_ + rewards.append(ep_reward) + if ma_rewards: + ma_rewards.append( + 0.9*ma_rewards[-1]+0.1*ep_reward) + else: + ma_rewards.append(ep_reward) + print('回合:{}/{}, 奖励:{}'.format(i_ep+1, cfg.test_eps, ep_reward)) + print('完成训练!') + env.close() + res_dic = {'rewards':rewards,'ma_rewards':ma_rewards} + return res_dic + +if __name__ == "__main__": + cfg = get_args() + # 训练 + env, agent = env_agent_config(cfg) + res_dic = train(cfg, env, agent) + make_dir(cfg.result_path, cfg.model_path) + save_args(cfg) # 保存参数 + agent.save(path=cfg.model_path) # save model + save_results(res_dic, tag='train', + path=cfg.result_path) + plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train") + # 测试 + env, agent = env_agent_config(cfg) + agent.load(path=cfg.model_path) # 导入模型 + res_dic = test(cfg, env, agent) + save_results(res_dic, tag='test', + path=cfg.result_path) # 保存结果 + plot_rewards(res_dic['rewards'], res_dic['ma_rewards'],cfg, tag="test") # 画出结果 \ No newline at end of file diff --git a/codes/PPO/task1.py b/projects/codes/PPO/task1.py similarity index 100% rename from codes/PPO/task1.py rename to projects/codes/PPO/task1.py diff --git a/codes/PolicyGradient/README.md b/projects/codes/PolicyGradient/README.md similarity index 100% rename from codes/PolicyGradient/README.md rename to projects/codes/PolicyGradient/README.md diff --git a/codes/PolicyGradient/assets/image-20211016004808604.png b/projects/codes/PolicyGradient/assets/image-20211016004808604.png similarity index 100% rename from codes/PolicyGradient/assets/image-20211016004808604.png rename to projects/codes/PolicyGradient/assets/image-20211016004808604.png diff --git a/codes/PolicyGradient/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210428001336032.png b/projects/codes/PolicyGradient/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210428001336032.png similarity index 100% rename from codes/PolicyGradient/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210428001336032.png rename to projects/codes/PolicyGradient/assets/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0pvaG5KaW0w,size_16,color_FFFFFF,t_70-20210428001336032.png diff --git a/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/models/pg_checkpoint.pt b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/models/pg_checkpoint.pt similarity index 100% rename from codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/models/pg_checkpoint.pt rename to projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/models/pg_checkpoint.pt diff --git a/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_ma_rewards.npy b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_ma_rewards.npy similarity index 100% rename from codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_ma_rewards.npy rename to projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_ma_rewards.npy diff --git a/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_rewards.npy b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_rewards.npy similarity index 100% rename from codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_rewards.npy rename to projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_rewards.npy diff --git a/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_rewards_curve.png b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_rewards_curve.png similarity index 100% rename from codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_rewards_curve.png rename to projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/test_rewards_curve.png diff --git a/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_ma_rewards.npy b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_ma_rewards.npy similarity index 100% rename from codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_ma_rewards.npy rename to projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_ma_rewards.npy diff --git a/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_rewards.npy b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_rewards.npy similarity index 100% rename from codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_rewards.npy rename to projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_rewards.npy diff --git a/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_rewards_curve.png b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_rewards_curve.png similarity index 100% rename from codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_rewards_curve.png rename to projects/codes/PolicyGradient/outputs/CartPole-v0/20220210-061325/results/train_rewards_curve.png diff --git a/codes/PolicyGradient/pg.py b/projects/codes/PolicyGradient/pg.py similarity index 100% rename from codes/PolicyGradient/pg.py rename to projects/codes/PolicyGradient/pg.py diff --git a/codes/PolicyGradient/task0.py b/projects/codes/PolicyGradient/task0.py similarity index 100% rename from codes/PolicyGradient/task0.py rename to projects/codes/PolicyGradient/task0.py diff --git a/codes/QLearning/env/gridworld_env.py b/projects/codes/QLearning/env/gridworld_env.py similarity index 100% rename from codes/QLearning/env/gridworld_env.py rename to projects/codes/QLearning/env/gridworld_env.py diff --git a/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/models/Qleaning_model.pkl b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/models/Qleaning_model.pkl similarity index 100% rename from codes/QLearning/outputs/CliffWalking-v0/20220210-005501/models/Qleaning_model.pkl rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/models/Qleaning_model.pkl diff --git a/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_ma_rewards.npy b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_ma_rewards.npy similarity index 100% rename from codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_ma_rewards.npy rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_ma_rewards.npy diff --git a/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards.npy b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards.npy similarity index 100% rename from codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards.npy rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards.npy diff --git a/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards_curve.png similarity index 100% rename from codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards_curve.png rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards_curve.png diff --git a/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_ma_rewards.npy b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_ma_rewards.npy similarity index 100% rename from codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_ma_rewards.npy rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_ma_rewards.npy diff --git a/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards.npy b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards.npy similarity index 100% rename from codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards.npy rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards.npy diff --git a/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards_curve.png similarity index 100% rename from codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards_curve.png rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards_curve.png diff --git a/codes/QLearning/qlearning.py b/projects/codes/QLearning/qlearning.py similarity index 100% rename from codes/QLearning/qlearning.py rename to projects/codes/QLearning/qlearning.py diff --git a/codes/QLearning/task0.py b/projects/codes/QLearning/task0.py similarity index 100% rename from codes/QLearning/task0.py rename to projects/codes/QLearning/task0.py diff --git a/codes/RainbowDQN/rainbow_dqn.py b/projects/codes/RainbowDQN/rainbow_dqn.py similarity index 100% rename from codes/RainbowDQN/rainbow_dqn.py rename to projects/codes/RainbowDQN/rainbow_dqn.py diff --git a/codes/RainbowDQN/task0.py b/projects/codes/RainbowDQN/task0.py similarity index 100% rename from codes/RainbowDQN/task0.py rename to projects/codes/RainbowDQN/task0.py diff --git a/codes/Sarsa/README.md b/projects/codes/Sarsa/README.md similarity index 100% rename from codes/Sarsa/README.md rename to projects/codes/Sarsa/README.md diff --git a/codes/Sarsa/assets/sarsa_algo.png b/projects/codes/Sarsa/assets/sarsa_algo.png similarity index 100% rename from codes/Sarsa/assets/sarsa_algo.png rename to projects/codes/Sarsa/assets/sarsa_algo.png diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/models/sarsa_model.pkl b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/models/sarsa_model.pkl similarity index 100% rename from codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/models/sarsa_model.pkl rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/models/sarsa_model.pkl diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_ma_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_ma_rewards.npy similarity index 100% rename from codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_ma_rewards.npy rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_ma_rewards.npy diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards.npy similarity index 100% rename from codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards.npy rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards.npy diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards_curve.png similarity index 100% rename from codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards_curve.png rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards_curve.png diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_ma_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_ma_rewards.npy similarity index 100% rename from codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_ma_rewards.npy rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_ma_rewards.npy diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards.npy similarity index 100% rename from codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards.npy rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards.npy diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards_curve.png similarity index 100% rename from codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards_curve.png rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards_curve.png diff --git a/codes/Sarsa/sarsa.py b/projects/codes/Sarsa/sarsa.py similarity index 100% rename from codes/Sarsa/sarsa.py rename to projects/codes/Sarsa/sarsa.py diff --git a/codes/Sarsa/task0.py b/projects/codes/Sarsa/task0.py similarity index 100% rename from codes/Sarsa/task0.py rename to projects/codes/Sarsa/task0.py diff --git a/codes/SoftActorCritic/env_wrapper.py b/projects/codes/SoftActorCritic/env_wrapper.py similarity index 100% rename from codes/SoftActorCritic/env_wrapper.py rename to projects/codes/SoftActorCritic/env_wrapper.py diff --git a/codes/SoftActorCritic/model.py b/projects/codes/SoftActorCritic/model.py similarity index 100% rename from codes/SoftActorCritic/model.py rename to projects/codes/SoftActorCritic/model.py diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy_optimizer b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy_optimizer similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy_optimizer rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_policy_optimizer diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q_optimizer b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q_optimizer similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q_optimizer rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_soft_q_optimizer diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value_optimizer b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value_optimizer similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value_optimizer rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/models/sac_value_optimizer diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_ma_rewards.npy b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_ma_rewards.npy similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_ma_rewards.npy rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_ma_rewards.npy diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards.npy b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards.npy similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards.npy rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards.npy diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards_curve.png b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards_curve.png similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards_curve.png rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/test_rewards_curve.png diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_ma_rewards.npy b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_ma_rewards.npy similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_ma_rewards.npy rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_ma_rewards.npy diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards.npy b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards.npy similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards.npy rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards.npy diff --git a/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards_curve.png b/projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards_curve.png similarity index 100% rename from codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards_curve.png rename to projects/codes/SoftActorCritic/outputs/Pendulum-v1/20211222-162722/results/train_rewards_curve.png diff --git a/codes/SoftActorCritic/sac.py b/projects/codes/SoftActorCritic/sac.py similarity index 100% rename from codes/SoftActorCritic/sac.py rename to projects/codes/SoftActorCritic/sac.py diff --git a/codes/SoftActorCritic/task0.py b/projects/codes/SoftActorCritic/task0.py similarity index 100% rename from codes/SoftActorCritic/task0.py rename to projects/codes/SoftActorCritic/task0.py diff --git a/codes/SoftActorCritic/task0_train.ipynb b/projects/codes/SoftActorCritic/task0_train.ipynb similarity index 100% rename from codes/SoftActorCritic/task0_train.ipynb rename to projects/codes/SoftActorCritic/task0_train.ipynb diff --git a/codes/TD3/README.md b/projects/codes/TD3/README.md similarity index 100% rename from codes/TD3/README.md rename to projects/codes/TD3/README.md diff --git a/codes/TD3/agent.py b/projects/codes/TD3/agent.py similarity index 100% rename from codes/TD3/agent.py rename to projects/codes/TD3/agent.py diff --git a/codes/TD3/memory.py b/projects/codes/TD3/memory.py similarity index 100% rename from codes/TD3/memory.py rename to projects/codes/TD3/memory.py diff --git a/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_actor b/projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_actor similarity index 100% rename from codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_actor rename to projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_actor diff --git a/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_actor_optimizer b/projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_actor_optimizer similarity index 100% rename from codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_actor_optimizer rename to projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_actor_optimizer diff --git a/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_critic b/projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_critic similarity index 100% rename from codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_critic rename to projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_critic diff --git a/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_critic_optimizer b/projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_critic_optimizer similarity index 100% rename from codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_critic_optimizer rename to projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/models/td3_critic_optimizer diff --git a/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/ma_rewards_train.npy b/projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/ma_rewards_train.npy similarity index 100% rename from codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/ma_rewards_train.npy rename to projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/ma_rewards_train.npy diff --git a/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/rewards_curve_train.png b/projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/rewards_curve_train.png similarity index 100% rename from codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/rewards_curve_train.png rename to projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/rewards_curve_train.png diff --git a/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/rewards_train.npy b/projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/rewards_train.npy similarity index 100% rename from codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/rewards_train.npy rename to projects/codes/TD3/outputs/HalfCheetah-v2/20210416-130341/results/rewards_train.npy diff --git a/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_actor b/projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_actor similarity index 100% rename from codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_actor rename to projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_actor diff --git a/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_actor_optimizer b/projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_actor_optimizer similarity index 100% rename from codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_actor_optimizer rename to projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_actor_optimizer diff --git a/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_critic b/projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_critic similarity index 100% rename from codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_critic rename to projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_critic diff --git a/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_critic_optimizer b/projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_critic_optimizer similarity index 100% rename from codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_critic_optimizer rename to projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/models/td3_critic_optimizer diff --git a/codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_ma_rewards.npy b/projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_ma_rewards.npy similarity index 100% rename from codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_ma_rewards.npy rename to projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_ma_rewards.npy diff --git a/codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_rewards.npy b/projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_rewards.npy similarity index 100% rename from codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_rewards.npy rename to projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_rewards.npy diff --git a/codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_rewards_curve.png b/projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_rewards_curve.png similarity index 100% rename from codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_rewards_curve.png rename to projects/codes/TD3/outputs/Pendulum-v1/20211119-123814/results/train_rewards_curve.png diff --git a/codes/TD3/outputs/Reacher-v2/20210415-021952/ma_rewards_train.npy b/projects/codes/TD3/outputs/Reacher-v2/20210415-021952/ma_rewards_train.npy similarity index 100% rename from codes/TD3/outputs/Reacher-v2/20210415-021952/ma_rewards_train.npy rename to projects/codes/TD3/outputs/Reacher-v2/20210415-021952/ma_rewards_train.npy diff --git a/codes/TD3/outputs/Reacher-v2/20210415-021952/rewards_curve_train.png b/projects/codes/TD3/outputs/Reacher-v2/20210415-021952/rewards_curve_train.png similarity index 100% rename from codes/TD3/outputs/Reacher-v2/20210415-021952/rewards_curve_train.png rename to projects/codes/TD3/outputs/Reacher-v2/20210415-021952/rewards_curve_train.png diff --git a/codes/TD3/outputs/Reacher-v2/20210415-021952/rewards_train.npy b/projects/codes/TD3/outputs/Reacher-v2/20210415-021952/rewards_train.npy similarity index 100% rename from codes/TD3/outputs/Reacher-v2/20210415-021952/rewards_train.npy rename to projects/codes/TD3/outputs/Reacher-v2/20210415-021952/rewards_train.npy diff --git a/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_actor b/projects/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_actor similarity index 100% rename from codes/TD3/outputs/Reacher-v2/20210415-021952/td3_actor rename to projects/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_actor diff --git a/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_actor_optimizer b/projects/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_actor_optimizer similarity index 100% rename from codes/TD3/outputs/Reacher-v2/20210415-021952/td3_actor_optimizer rename to projects/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_actor_optimizer diff --git a/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_critic b/projects/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_critic similarity index 100% rename from codes/TD3/outputs/Reacher-v2/20210415-021952/td3_critic rename to projects/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_critic diff --git a/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_critic_optimizer b/projects/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_critic_optimizer similarity index 100% rename from codes/TD3/outputs/Reacher-v2/20210415-021952/td3_critic_optimizer rename to projects/codes/TD3/outputs/Reacher-v2/20210415-021952/td3_critic_optimizer diff --git a/codes/TD3/task0_eval.py b/projects/codes/TD3/task0_eval.py similarity index 100% rename from codes/TD3/task0_eval.py rename to projects/codes/TD3/task0_eval.py diff --git a/codes/TD3/task0_train.py b/projects/codes/TD3/task0_train.py similarity index 100% rename from codes/TD3/task0_train.py rename to projects/codes/TD3/task0_train.py diff --git a/codes/TD3/task1_eval.py b/projects/codes/TD3/task1_eval.py similarity index 100% rename from codes/TD3/task1_eval.py rename to projects/codes/TD3/task1_eval.py diff --git a/codes/TD3/task1_train.py b/projects/codes/TD3/task1_train.py similarity index 100% rename from codes/TD3/task1_train.py rename to projects/codes/TD3/task1_train.py diff --git a/codes/assets/image-20200820174307301.png b/projects/codes/assets/image-20200820174307301.png similarity index 100% rename from codes/assets/image-20200820174307301.png rename to projects/codes/assets/image-20200820174307301.png diff --git a/codes/assets/image-20200820174814084.png b/projects/codes/assets/image-20200820174814084.png similarity index 100% rename from codes/assets/image-20200820174814084.png rename to projects/codes/assets/image-20200820174814084.png diff --git a/codes/common/atari_wrappers.py b/projects/codes/common/atari_wrappers.py similarity index 100% rename from codes/common/atari_wrappers.py rename to projects/codes/common/atari_wrappers.py diff --git a/codes/common/memory.py b/projects/codes/common/memory.py similarity index 100% rename from codes/common/memory.py rename to projects/codes/common/memory.py diff --git a/codes/common/model.py b/projects/codes/common/model.py similarity index 100% rename from codes/common/model.py rename to projects/codes/common/model.py diff --git a/codes/common/multiprocessing_env.py b/projects/codes/common/multiprocessing_env.py similarity index 100% rename from codes/common/multiprocessing_env.py rename to projects/codes/common/multiprocessing_env.py diff --git a/codes/common/utils.py b/projects/codes/common/utils.py similarity index 83% rename from codes/common/utils.py rename to projects/codes/common/utils.py index 654b73c..9cc625e 100644 --- a/codes/common/utils.py +++ b/projects/codes/common/utils.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 16:02:24 LastEditor: John -LastEditTime: 2022-07-21 21:45:33 +LastEditTime: 2022-07-31 23:18:04 Discription: Environment: ''' @@ -106,4 +106,21 @@ def save_args(args): with open(args.result_path+'params.json', 'w') as fp: json.dump(args_dict, fp) print("Parameters saved!") - \ No newline at end of file +def smooth(data, weight=0.9): + '''_summary_ + + Args: + data (List):输入数据 + weight (Float): 平滑权重,处于0-1之间,数值越高说明越平滑,一般取0.9 + + Returns: + smoothed (List): 平滑后的数据 + ''' + last = data[0] # First value in the plot (first timestep) + smoothed = list() + for point in data: + smoothed_val = last * weight + (1 - weight) * point # 计算平滑值 + smoothed.append(smoothed_val) + last = smoothed_val + + return smoothed \ No newline at end of file diff --git a/codes/common/wrappers.py b/projects/codes/common/wrappers.py similarity index 100% rename from codes/common/wrappers.py rename to projects/codes/common/wrappers.py diff --git a/codes/envs/README.md b/projects/codes/envs/README.md similarity index 100% rename from codes/envs/README.md rename to projects/codes/envs/README.md diff --git a/codes/envs/assets/action_grid.png b/projects/codes/envs/assets/action_grid.png similarity index 100% rename from codes/envs/assets/action_grid.png rename to projects/codes/envs/assets/action_grid.png diff --git a/codes/envs/assets/gym_info_20211130180023.png b/projects/codes/envs/assets/gym_info_20211130180023.png similarity index 100% rename from codes/envs/assets/gym_info_20211130180023.png rename to projects/codes/envs/assets/gym_info_20211130180023.png diff --git a/codes/envs/assets/image-20200820174307301.png b/projects/codes/envs/assets/image-20200820174307301.png similarity index 100% rename from codes/envs/assets/image-20200820174307301.png rename to projects/codes/envs/assets/image-20200820174307301.png diff --git a/codes/envs/assets/image-20200820174814084.png b/projects/codes/envs/assets/image-20200820174814084.png similarity index 100% rename from codes/envs/assets/image-20200820174814084.png rename to projects/codes/envs/assets/image-20200820174814084.png diff --git a/codes/envs/assets/image-20201007211441036.png b/projects/codes/envs/assets/image-20201007211441036.png similarity index 100% rename from codes/envs/assets/image-20201007211441036.png rename to projects/codes/envs/assets/image-20201007211441036.png diff --git a/codes/envs/assets/image-20201007211858925.png b/projects/codes/envs/assets/image-20201007211858925.png similarity index 100% rename from codes/envs/assets/image-20201007211858925.png rename to projects/codes/envs/assets/image-20201007211858925.png diff --git a/codes/envs/assets/image-20210429150622353.png b/projects/codes/envs/assets/image-20210429150622353.png similarity index 100% rename from codes/envs/assets/image-20210429150622353.png rename to projects/codes/envs/assets/image-20210429150622353.png diff --git a/codes/envs/assets/image-20210429150630806.png b/projects/codes/envs/assets/image-20210429150630806.png similarity index 100% rename from codes/envs/assets/image-20210429150630806.png rename to projects/codes/envs/assets/image-20210429150630806.png diff --git a/codes/envs/assets/track_big.png b/projects/codes/envs/assets/track_big.png similarity index 100% rename from codes/envs/assets/track_big.png rename to projects/codes/envs/assets/track_big.png diff --git a/codes/envs/blackjack.py b/projects/codes/envs/blackjack.py similarity index 100% rename from codes/envs/blackjack.py rename to projects/codes/envs/blackjack.py diff --git a/codes/envs/cliff_walking.py b/projects/codes/envs/cliff_walking.py similarity index 100% rename from codes/envs/cliff_walking.py rename to projects/codes/envs/cliff_walking.py diff --git a/codes/envs/gridworld.py b/projects/codes/envs/gridworld.py similarity index 100% rename from codes/envs/gridworld.py rename to projects/codes/envs/gridworld.py diff --git a/codes/envs/gym_info.md b/projects/codes/envs/gym_info.md similarity index 100% rename from codes/envs/gym_info.md rename to projects/codes/envs/gym_info.md diff --git a/codes/envs/mujoco_info.md b/projects/codes/envs/mujoco_info.md similarity index 100% rename from codes/envs/mujoco_info.md rename to projects/codes/envs/mujoco_info.md diff --git a/codes/envs/racetrack_env.md b/projects/codes/envs/racetrack_env.md similarity index 100% rename from codes/envs/racetrack_env.md rename to projects/codes/envs/racetrack_env.md diff --git a/codes/envs/racetrack_env.py b/projects/codes/envs/racetrack_env.py similarity index 100% rename from codes/envs/racetrack_env.py rename to projects/codes/envs/racetrack_env.py diff --git a/codes/envs/snake/README.md b/projects/codes/envs/snake/README.md similarity index 100% rename from codes/envs/snake/README.md rename to projects/codes/envs/snake/README.md diff --git a/codes/envs/snake/agent.py b/projects/codes/envs/snake/agent.py similarity index 100% rename from codes/envs/snake/agent.py rename to projects/codes/envs/snake/agent.py diff --git a/codes/envs/snake/checkpoint.npy b/projects/codes/envs/snake/checkpoint.npy similarity index 100% rename from codes/envs/snake/checkpoint.npy rename to projects/codes/envs/snake/checkpoint.npy diff --git a/codes/envs/snake/checkpoint1.npy b/projects/codes/envs/snake/checkpoint1.npy similarity index 100% rename from codes/envs/snake/checkpoint1.npy rename to projects/codes/envs/snake/checkpoint1.npy diff --git a/codes/envs/snake/checkpoint2.npy b/projects/codes/envs/snake/checkpoint2.npy similarity index 100% rename from codes/envs/snake/checkpoint2.npy rename to projects/codes/envs/snake/checkpoint2.npy diff --git a/codes/envs/snake/checkpoint3.npy b/projects/codes/envs/snake/checkpoint3.npy similarity index 100% rename from codes/envs/snake/checkpoint3.npy rename to projects/codes/envs/snake/checkpoint3.npy diff --git a/codes/envs/snake/example_assignment_and_report2.pdf b/projects/codes/envs/snake/example_assignment_and_report2.pdf similarity index 100% rename from codes/envs/snake/example_assignment_and_report2.pdf rename to projects/codes/envs/snake/example_assignment_and_report2.pdf diff --git a/codes/envs/snake/main.py b/projects/codes/envs/snake/main.py similarity index 100% rename from codes/envs/snake/main.py rename to projects/codes/envs/snake/main.py diff --git a/codes/envs/snake/q_agent.npy b/projects/codes/envs/snake/q_agent.npy similarity index 100% rename from codes/envs/snake/q_agent.npy rename to projects/codes/envs/snake/q_agent.npy diff --git a/codes/envs/snake/snake_env.py b/projects/codes/envs/snake/snake_env.py similarity index 100% rename from codes/envs/snake/snake_env.py rename to projects/codes/envs/snake/snake_env.py diff --git a/codes/envs/snake/utils.py b/projects/codes/envs/snake/utils.py similarity index 100% rename from codes/envs/snake/utils.py rename to projects/codes/envs/snake/utils.py diff --git a/codes/envs/stochastic_mdp.py b/projects/codes/envs/stochastic_mdp.py similarity index 100% rename from codes/envs/stochastic_mdp.py rename to projects/codes/envs/stochastic_mdp.py diff --git a/codes/envs/track.txt b/projects/codes/envs/track.txt similarity index 100% rename from codes/envs/track.txt rename to projects/codes/envs/track.txt diff --git a/codes/envs/windy_gridworld.py b/projects/codes/envs/windy_gridworld.py similarity index 100% rename from codes/envs/windy_gridworld.py rename to projects/codes/envs/windy_gridworld.py diff --git a/projects/environment.yaml b/projects/environment.yaml new file mode 100644 index 0000000..6915f5f --- /dev/null +++ b/projects/environment.yaml @@ -0,0 +1,124 @@ +name: rl_tutorials +channels: + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2 + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2/ + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ + - defaults +dependencies: + - backcall=0.2.0=pyh9f0ad1d_0 + - backports=1.0=py_2 + - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0 + - blas=2.114=mkl + - blas-devel=3.9.0=14_win64_mkl + - brotlipy=0.7.0=py37hcc03f2d_1004 + - ca-certificates=2022.5.18.1=h5b45459_0 + - certifi=2022.5.18.1=py37h03978a9_0 + - cffi=1.15.0=py37hd8e9650_0 + - charset-normalizer=2.0.12=pyhd8ed1ab_0 + - colorama=0.4.4=pyh9f0ad1d_0 + - cryptography=37.0.1=py37h21b164f_0 + - cudatoolkit=11.3.1=h59b6b97_2 + - debugpy=1.6.0=py37hf2a7229_0 + - decorator=5.1.1=pyhd8ed1ab_0 + - entrypoints=0.4=pyhd8ed1ab_0 + - freetype=2.10.4=h546665d_1 + - idna=3.3=pyhd8ed1ab_0 + - intel-openmp=2022.1.0=h57928b3_3787 + - ipykernel=6.13.0=py37h90c5f73_0 + - jedi=0.18.1=py37h03978a9_1 + - jpeg=9e=h8ffe710_1 + - jupyter_client=7.3.1=pyhd8ed1ab_0 + - jupyter_core=4.10.0=py37h03978a9_0 + - lcms2=2.12=h2a16943_0 + - lerc=3.0=h0e60522_0 + - libblas=3.9.0=14_win64_mkl + - libcblas=3.9.0=14_win64_mkl + - libdeflate=1.10=h8ffe710_0 + - liblapack=3.9.0=14_win64_mkl + - liblapacke=3.9.0=14_win64_mkl + - libpng=1.6.37=h1d00b33_2 + - libsodium=1.0.18=h8d14728_1 + - libtiff=4.3.0=hc4061b1_4 + - libuv=1.43.0=h8ffe710_0 + - libwebp=1.2.2=h57928b3_0 + - libwebp-base=1.2.2=h8ffe710_1 + - libxcb=1.13=hcd874cb_1004 + - libzlib=1.2.11=h8ffe710_1014 + - lz4-c=1.9.3=h8ffe710_1 + - m2w64-gcc-libgfortran=5.3.0=6 + - m2w64-gcc-libs=5.3.0=7 + - m2w64-gcc-libs-core=5.3.0=7 + - m2w64-gmp=6.1.0=2 + - m2w64-libwinpthread-git=5.0.0.4634.697f757=2 + - matplotlib-inline=0.1.3=pyhd8ed1ab_0 + - mkl=2022.0.0=h0e2418a_796 + - mkl-devel=2022.0.0=h57928b3_797 + - mkl-include=2022.0.0=h0e2418a_796 + - msys2-conda-epoch=20160418=1 + - nest-asyncio=1.5.5=pyhd8ed1ab_0 + - numpy=1.21.6=py37h2830a78_0 + - openjpeg=2.4.0=hb211442_1 + - openssl=3.0.3=h8ffe710_0 + - packaging=21.3=pyhd8ed1ab_0 + - parso=0.8.3=pyhd8ed1ab_0 + - pickleshare=0.7.5=py_1003 + - pillow=9.1.1=py37h8675073_0 + - pip=22.1.1=pyhd8ed1ab_0 + - prompt-toolkit=3.0.29=pyha770c72_0 + - psutil=5.9.1=py37hcc03f2d_0 + - pthread-stubs=0.4=hcd874cb_1001 + - pycparser=2.21=pyhd8ed1ab_0 + - pyopenssl=22.0.0=pyhd8ed1ab_0 + - pysocks=1.7.1=py37h03978a9_5 + - python=3.7.12=h900ac77_100_cpython + - python-dateutil=2.8.2=pyhd8ed1ab_0 + - python_abi=3.7=2_cp37m + - pytorch=1.11.0=py3.7_cuda11.3_cudnn8_0 + - pytorch-mutex=1.0=cuda + - pywin32=303=py37hcc03f2d_0 + - pyzmq=23.0.0=py37hcce574b_0 + - requests=2.27.1=pyhd8ed1ab_0 + - setuptools=62.3.2=py37h03978a9_0 + - six=1.16.0=pyh6c4a22f_0 + - sqlite=3.38.5=h8ffe710_0 + - tbb=2021.5.0=h2d74725_1 + - tk=8.6.12=h8ffe710_0 + - torchaudio=0.11.0=py37_cu113 + - torchvision=0.12.0=py37_cu113 + - tornado=6.1=py37hcc03f2d_3 + - typing_extensions=4.2.0=pyha770c72_1 + - ucrt=10.0.20348.0=h57928b3_0 + - urllib3=1.26.9=pyhd8ed1ab_0 + - vc=14.2=hb210afc_6 + - vs2015_runtime=14.29.30037=h902a5da_6 + - wcwidth=0.2.5=pyh9f0ad1d_2 + - wheel=0.37.1=pyhd8ed1ab_0 + - win_inet_pton=1.1.0=py37h03978a9_4 + - xorg-libxau=1.0.9=hcd874cb_0 + - xorg-libxdmcp=1.1.3=hcd874cb_0 + - xz=5.2.5=h62dcd97_1 + - zeromq=4.3.4=h0e60522_1 + - zlib=1.2.11=h8ffe710_1014 + - zstd=1.5.2=h6255e5f_1 + - pip: + - cloudpickle==2.1.0 + - cycler==0.11.0 + - dill==0.3.4 + - easydict==1.9 + - fonttools==4.33.2 + - gym==0.21.0 + - importlib-metadata==4.12.0 + - ipython==7.32.0 + - kiwisolver==1.4.2 + - matplotlib==3.5.1 + - pandas==1.3.5 + - pygments==2.11.2 + - pyparsing==3.0.8 + - pytz==2022.1 + - scipy==1.7.3 + - seaborn==0.11.2 + - traitlets==5.1.1 + - zipp==3.8.1 +prefix: C:\Users\24438\anaconda3\envs\rl_tutorials diff --git a/notebooks/A2C.ipynb b/projects/notebooks/A2C.ipynb similarity index 100% rename from notebooks/A2C.ipynb rename to projects/notebooks/A2C.ipynb diff --git a/notebooks/DQN.ipynb b/projects/notebooks/DQN.ipynb similarity index 100% rename from notebooks/DQN.ipynb rename to projects/notebooks/DQN.ipynb diff --git a/notebooks/QLearning.ipynb b/projects/notebooks/QLearning.ipynb similarity index 100% rename from notebooks/QLearning.ipynb rename to projects/notebooks/QLearning.ipynb diff --git a/notebooks/common/multiprocessing_env.py b/projects/notebooks/common/multiprocessing_env.py similarity index 100% rename from notebooks/common/multiprocessing_env.py rename to projects/notebooks/common/multiprocessing_env.py diff --git a/notebooks/figs/dqn_pseu.png b/projects/notebooks/figs/dqn_pseu.png similarity index 100% rename from notebooks/figs/dqn_pseu.png rename to projects/notebooks/figs/dqn_pseu.png