test
This commit is contained in:
parent
75e5237272
commit
19f8b6246a
@ -1,44 +1,61 @@
|
||||
from utils import str2bool,evaluate_policy
|
||||
from env import PartitionMazeEnv
|
||||
from utils import str2bool, evaluate_policy
|
||||
from datetime import datetime
|
||||
from DDPG import DDPG_agent
|
||||
import gymnasium as gym
|
||||
import os, shutil
|
||||
import os
|
||||
import shutil
|
||||
import argparse
|
||||
import torch
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from env import PartitionMazeEnv
|
||||
|
||||
|
||||
'''Hyperparameter Setting'''
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--dvc', type=str, default='cpu', help='running device: cuda or cpu')
|
||||
parser.add_argument('--EnvIdex', type=int, default=0, help='PartitionMaze_DDPG, PV1, Lch_Cv2, Humanv4, HCv4, BWv3, BWHv3')
|
||||
parser.add_argument('--write', type=str2bool, default=False, help='Use SummaryWriter to record the training')
|
||||
parser.add_argument('--render', type=str2bool, default=False, help='Render or Not')
|
||||
parser.add_argument('--Loadmodel', type=str2bool, default=False, help='Load pretrained model or Not')
|
||||
parser.add_argument('--ModelIdex', type=int, default=100, help='which model to load')
|
||||
parser.add_argument('--dvc', type=str, default='cpu',
|
||||
help='running device: cuda or cpu')
|
||||
parser.add_argument('--EnvIdex', type=int, default=0,
|
||||
help='PartitionMaze_DDPG, PV1, Lch_Cv2, Humanv4, HCv4, BWv3, BWHv3')
|
||||
parser.add_argument('--write', type=str2bool, default=False,
|
||||
help='Use SummaryWriter to record the training')
|
||||
parser.add_argument('--render', type=str2bool,
|
||||
default=False, help='Render or Not')
|
||||
parser.add_argument('--Loadmodel', type=str2bool,
|
||||
default=False, help='Load pretrained model or Not')
|
||||
parser.add_argument('--ModelIdex', type=int, default=100,
|
||||
help='which model to load')
|
||||
|
||||
parser.add_argument('--seed', type=int, default=42, help='random seed')
|
||||
parser.add_argument('--Max_train_steps', type=int, default=5e8, help='Max training steps')
|
||||
parser.add_argument('--save_interval', type=int, default=1e5, help='Model saving interval, in steps.')
|
||||
parser.add_argument('--eval_interval', type=int, default=2e3, help='Model evaluating interval, in steps.')
|
||||
parser.add_argument('--Max_train_steps', type=int,
|
||||
default=5e8, help='Max training steps')
|
||||
parser.add_argument('--save_interval', type=int, default=1e5,
|
||||
help='Model saving interval, in steps.')
|
||||
parser.add_argument('--eval_interval', type=int, default=2e3,
|
||||
help='Model evaluating interval, in steps.')
|
||||
|
||||
parser.add_argument('--gamma', type=float, default=0.99, help='Discounted Factor')
|
||||
parser.add_argument('--net_width', type=int, default=400, help='Hidden net width, s_dim-400-300-a_dim')
|
||||
parser.add_argument('--a_lr', type=float, default=1e-3, help='Learning rate of actor')
|
||||
parser.add_argument('--c_lr', type=float, default=1e-3, help='Learning rate of critic')
|
||||
parser.add_argument('--batch_size', type=int, default=128, help='batch_size of training')
|
||||
parser.add_argument('--random_steps', type=int, default=5e4, help='random steps before trianing')
|
||||
parser.add_argument('--gamma', type=float, default=0.99,
|
||||
help='Discounted Factor')
|
||||
parser.add_argument('--net_width', type=int, default=400,
|
||||
help='Hidden net width, s_dim-400-300-a_dim')
|
||||
parser.add_argument('--a_lr', type=float, default=1e-3,
|
||||
help='Learning rate of actor')
|
||||
parser.add_argument('--c_lr', type=float, default=1e-3,
|
||||
help='Learning rate of critic')
|
||||
parser.add_argument('--batch_size', type=int, default=128,
|
||||
help='batch_size of training')
|
||||
parser.add_argument('--random_steps', type=int, default=5e4,
|
||||
help='random steps before trianing')
|
||||
parser.add_argument('--noise', type=float, default=0.1, help='exploring noise')
|
||||
opt = parser.parse_args()
|
||||
opt.dvc = torch.device(opt.dvc) # from str to torch.device
|
||||
opt.dvc = torch.device(opt.dvc) # from str to torch.device
|
||||
print(opt)
|
||||
|
||||
|
||||
def main():
|
||||
EnvName = ['Pendulum-v1','LunarLanderContinuous-v2','Humanoid-v4','HalfCheetah-v4','BipedalWalker-v3','BipedalWalkerHardcore-v3']
|
||||
BrifEnvName = ['PV1', 'LLdV2', 'Humanv4', 'HCv4','BWv3', 'BWHv3']
|
||||
EnvName = ['Pendulum-v1', 'LunarLanderContinuous-v2', 'Humanoid-v4',
|
||||
'HalfCheetah-v4', 'BipedalWalker-v3', 'BipedalWalkerHardcore-v3']
|
||||
BrifEnvName = ['PV1', 'LLdV2', 'Humanv4', 'HCv4', 'BWv3', 'BWHv3']
|
||||
|
||||
# Build Env
|
||||
# env = gym.make(EnvName[opt.EnvIdex], render_mode = "human" if opt.render else None)
|
||||
@ -47,8 +64,10 @@ def main():
|
||||
eval_env = PartitionMazeEnv()
|
||||
opt.state_dim = env.observation_space.shape[0]
|
||||
opt.action_dim = env.action_space.shape[0]
|
||||
opt.max_action = float(env.action_space.high[0]) #remark: action space【-max,max】
|
||||
print(f'Env:{EnvName[opt.EnvIdex]} state_dim:{opt.state_dim} action_dim:{opt.action_dim}')
|
||||
# remark: action space【-max,max】
|
||||
opt.max_action = float(env.action_space.high[0])
|
||||
print(
|
||||
f'Env:{EnvName[opt.EnvIdex]} state_dim:{opt.state_dim} action_dim:{opt.action_dim}')
|
||||
print(f'max_a:{opt.max_action} min_a:{env.action_space.low[0]}')
|
||||
|
||||
# Seed Everything
|
||||
@ -65,14 +84,16 @@ def main():
|
||||
timenow = str(datetime.now())[0:-10]
|
||||
timenow = ' ' + timenow[0:13] + '_' + timenow[-2::]
|
||||
writepath = 'logs/{}'.format(BrifEnvName[opt.EnvIdex]) + timenow
|
||||
if os.path.exists(writepath): shutil.rmtree(writepath)
|
||||
if os.path.exists(writepath):
|
||||
shutil.rmtree(writepath)
|
||||
writer = SummaryWriter(log_dir=writepath)
|
||||
|
||||
|
||||
# Build DRL model
|
||||
if not os.path.exists('weights'): os.mkdir('weights')
|
||||
agent = DDPG_agent(**vars(opt)) # var: transfer argparse to dictionary
|
||||
if opt.Loadmodel: agent.load(BrifEnvName[opt.EnvIdex], opt.ModelIdex)
|
||||
if not os.path.exists('weights'):
|
||||
os.mkdir('weights')
|
||||
agent = DDPG_agent(**vars(opt)) # var: transfer argparse to dictionary
|
||||
if opt.Loadmodel:
|
||||
agent.load(BrifEnvName[opt.EnvIdex], opt.ModelIdex)
|
||||
|
||||
if opt.render:
|
||||
while True:
|
||||
@ -81,15 +102,17 @@ def main():
|
||||
else:
|
||||
total_steps = 0
|
||||
while total_steps < opt.Max_train_steps:
|
||||
s = env.reset(seed=env_seed) # Do not use opt.seed directly, or it can overfit to opt.seed
|
||||
env_seed += 1
|
||||
s = env.reset()
|
||||
done = False
|
||||
|
||||
'''Interact & trian'''
|
||||
while not done:
|
||||
if total_steps < opt.random_steps: a = env.action_space.sample()
|
||||
else: a = agent.select_action(s, deterministic=False)
|
||||
s_next, r, dw, tr, info = env.step(a) # dw: dead&win; tr: truncated
|
||||
while not done:
|
||||
if total_steps < opt.random_steps:
|
||||
a = env.action_space.sample()
|
||||
else:
|
||||
a = agent.select_action(s, deterministic=False)
|
||||
s_next, r, dw, tr, info = env.step(
|
||||
a) # dw: dead&win; tr: truncated
|
||||
done = (dw or tr)
|
||||
|
||||
agent.replay_buffer.add(s, a, r, s_next, dw)
|
||||
@ -103,8 +126,11 @@ def main():
|
||||
'''record & log'''
|
||||
if total_steps % opt.eval_interval == 0:
|
||||
ep_r = evaluate_policy(eval_env, agent, turns=3)
|
||||
if opt.write: writer.add_scalar('ep_r', ep_r, global_step=total_steps)
|
||||
print(f'EnvName:{BrifEnvName[opt.EnvIdex]}, Steps: {int(total_steps/1000)}k, Episode Reward:{ep_r}')
|
||||
if opt.write:
|
||||
writer.add_scalar(
|
||||
'ep_r', ep_r, global_step=total_steps)
|
||||
print(
|
||||
f'EnvName:{BrifEnvName[opt.EnvIdex]}, Steps: {int(total_steps/1000)}k, Episode Reward:{ep_r}')
|
||||
|
||||
'''save model'''
|
||||
if total_steps % opt.save_interval == 0:
|
||||
@ -115,7 +141,3 @@ def main():
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user