From 19f8b6246a6787ff6b1b7a9f6901c9e3e8d89308 Mon Sep 17 00:00:00 2001 From: weixin_46229132 Date: Tue, 18 Mar 2025 14:29:16 +0800 Subject: [PATCH] test --- DDPG_solver/main.py | 102 +++++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 40 deletions(-) diff --git a/DDPG_solver/main.py b/DDPG_solver/main.py index daa2c2c..79b87e7 100644 --- a/DDPG_solver/main.py +++ b/DDPG_solver/main.py @@ -1,44 +1,61 @@ -from utils import str2bool,evaluate_policy +from env import PartitionMazeEnv +from utils import str2bool, evaluate_policy from datetime import datetime from DDPG import DDPG_agent import gymnasium as gym -import os, shutil +import os +import shutil import argparse import torch import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from env import PartitionMazeEnv '''Hyperparameter Setting''' parser = argparse.ArgumentParser() -parser.add_argument('--dvc', type=str, default='cpu', help='running device: cuda or cpu') -parser.add_argument('--EnvIdex', type=int, default=0, help='PartitionMaze_DDPG, PV1, Lch_Cv2, Humanv4, HCv4, BWv3, BWHv3') -parser.add_argument('--write', type=str2bool, default=False, help='Use SummaryWriter to record the training') -parser.add_argument('--render', type=str2bool, default=False, help='Render or Not') -parser.add_argument('--Loadmodel', type=str2bool, default=False, help='Load pretrained model or Not') -parser.add_argument('--ModelIdex', type=int, default=100, help='which model to load') +parser.add_argument('--dvc', type=str, default='cpu', + help='running device: cuda or cpu') +parser.add_argument('--EnvIdex', type=int, default=0, + help='PartitionMaze_DDPG, PV1, Lch_Cv2, Humanv4, HCv4, BWv3, BWHv3') +parser.add_argument('--write', type=str2bool, default=False, + help='Use SummaryWriter to record the training') +parser.add_argument('--render', type=str2bool, + default=False, help='Render or Not') +parser.add_argument('--Loadmodel', type=str2bool, + default=False, help='Load pretrained model or Not') +parser.add_argument('--ModelIdex', type=int, default=100, + help='which model to load') parser.add_argument('--seed', type=int, default=42, help='random seed') -parser.add_argument('--Max_train_steps', type=int, default=5e8, help='Max training steps') -parser.add_argument('--save_interval', type=int, default=1e5, help='Model saving interval, in steps.') -parser.add_argument('--eval_interval', type=int, default=2e3, help='Model evaluating interval, in steps.') +parser.add_argument('--Max_train_steps', type=int, + default=5e8, help='Max training steps') +parser.add_argument('--save_interval', type=int, default=1e5, + help='Model saving interval, in steps.') +parser.add_argument('--eval_interval', type=int, default=2e3, + help='Model evaluating interval, in steps.') -parser.add_argument('--gamma', type=float, default=0.99, help='Discounted Factor') -parser.add_argument('--net_width', type=int, default=400, help='Hidden net width, s_dim-400-300-a_dim') -parser.add_argument('--a_lr', type=float, default=1e-3, help='Learning rate of actor') -parser.add_argument('--c_lr', type=float, default=1e-3, help='Learning rate of critic') -parser.add_argument('--batch_size', type=int, default=128, help='batch_size of training') -parser.add_argument('--random_steps', type=int, default=5e4, help='random steps before trianing') +parser.add_argument('--gamma', type=float, default=0.99, + help='Discounted Factor') +parser.add_argument('--net_width', type=int, default=400, + help='Hidden net width, s_dim-400-300-a_dim') +parser.add_argument('--a_lr', type=float, default=1e-3, + help='Learning rate of actor') +parser.add_argument('--c_lr', type=float, default=1e-3, + help='Learning rate of critic') +parser.add_argument('--batch_size', type=int, default=128, + help='batch_size of training') +parser.add_argument('--random_steps', type=int, default=5e4, + help='random steps before trianing') parser.add_argument('--noise', type=float, default=0.1, help='exploring noise') opt = parser.parse_args() -opt.dvc = torch.device(opt.dvc) # from str to torch.device +opt.dvc = torch.device(opt.dvc) # from str to torch.device print(opt) def main(): - EnvName = ['Pendulum-v1','LunarLanderContinuous-v2','Humanoid-v4','HalfCheetah-v4','BipedalWalker-v3','BipedalWalkerHardcore-v3'] - BrifEnvName = ['PV1', 'LLdV2', 'Humanv4', 'HCv4','BWv3', 'BWHv3'] + EnvName = ['Pendulum-v1', 'LunarLanderContinuous-v2', 'Humanoid-v4', + 'HalfCheetah-v4', 'BipedalWalker-v3', 'BipedalWalkerHardcore-v3'] + BrifEnvName = ['PV1', 'LLdV2', 'Humanv4', 'HCv4', 'BWv3', 'BWHv3'] # Build Env # env = gym.make(EnvName[opt.EnvIdex], render_mode = "human" if opt.render else None) @@ -47,8 +64,10 @@ def main(): eval_env = PartitionMazeEnv() opt.state_dim = env.observation_space.shape[0] opt.action_dim = env.action_space.shape[0] - opt.max_action = float(env.action_space.high[0]) #remark: action space【-max,max】 - print(f'Env:{EnvName[opt.EnvIdex]} state_dim:{opt.state_dim} action_dim:{opt.action_dim}') + # remark: action space【-max,max】 + opt.max_action = float(env.action_space.high[0]) + print( + f'Env:{EnvName[opt.EnvIdex]} state_dim:{opt.state_dim} action_dim:{opt.action_dim}') print(f'max_a:{opt.max_action} min_a:{env.action_space.low[0]}') # Seed Everything @@ -65,14 +84,16 @@ def main(): timenow = str(datetime.now())[0:-10] timenow = ' ' + timenow[0:13] + '_' + timenow[-2::] writepath = 'logs/{}'.format(BrifEnvName[opt.EnvIdex]) + timenow - if os.path.exists(writepath): shutil.rmtree(writepath) + if os.path.exists(writepath): + shutil.rmtree(writepath) writer = SummaryWriter(log_dir=writepath) - # Build DRL model - if not os.path.exists('weights'): os.mkdir('weights') - agent = DDPG_agent(**vars(opt)) # var: transfer argparse to dictionary - if opt.Loadmodel: agent.load(BrifEnvName[opt.EnvIdex], opt.ModelIdex) + if not os.path.exists('weights'): + os.mkdir('weights') + agent = DDPG_agent(**vars(opt)) # var: transfer argparse to dictionary + if opt.Loadmodel: + agent.load(BrifEnvName[opt.EnvIdex], opt.ModelIdex) if opt.render: while True: @@ -81,15 +102,17 @@ def main(): else: total_steps = 0 while total_steps < opt.Max_train_steps: - s = env.reset(seed=env_seed) # Do not use opt.seed directly, or it can overfit to opt.seed - env_seed += 1 + s = env.reset() done = False '''Interact & trian''' - while not done: - if total_steps < opt.random_steps: a = env.action_space.sample() - else: a = agent.select_action(s, deterministic=False) - s_next, r, dw, tr, info = env.step(a) # dw: dead&win; tr: truncated + while not done: + if total_steps < opt.random_steps: + a = env.action_space.sample() + else: + a = agent.select_action(s, deterministic=False) + s_next, r, dw, tr, info = env.step( + a) # dw: dead&win; tr: truncated done = (dw or tr) agent.replay_buffer.add(s, a, r, s_next, dw) @@ -103,8 +126,11 @@ def main(): '''record & log''' if total_steps % opt.eval_interval == 0: ep_r = evaluate_policy(eval_env, agent, turns=3) - if opt.write: writer.add_scalar('ep_r', ep_r, global_step=total_steps) - print(f'EnvName:{BrifEnvName[opt.EnvIdex]}, Steps: {int(total_steps/1000)}k, Episode Reward:{ep_r}') + if opt.write: + writer.add_scalar( + 'ep_r', ep_r, global_step=total_steps) + print( + f'EnvName:{BrifEnvName[opt.EnvIdex]}, Steps: {int(total_steps/1000)}k, Episode Reward:{ep_r}') '''save model''' if total_steps % opt.save_interval == 0: @@ -115,7 +141,3 @@ def main(): if __name__ == '__main__': main() - - - -