From ff23b5e7456c9fbf54851f94ec5e878fca1be3f9 Mon Sep 17 00:00:00 2001 From: weixin_46229132 Date: Wed, 19 Mar 2025 16:31:23 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4=E5=A5=96=E5=8A=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PPO_Continuous/main.py | 4 +--- env.py | 7 +++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/PPO_Continuous/main.py b/PPO_Continuous/main.py index 8aded0a..d7d8e87 100644 --- a/PPO_Continuous/main.py +++ b/PPO_Continuous/main.py @@ -107,8 +107,6 @@ def main(): # kwargs["a_lr"] *= 2 # kwargs["c_lr"] *= 4 - if not os.path.exists('model'): - os.mkdir('model') # transfer opt to dictionary, and use it to init PPO_agent agent = PPO_agent(**vars(opt)) if opt.Loadmodel: @@ -154,7 +152,7 @@ def main(): if total_steps % opt.eval_interval == 0: # evaluate the policy for 3 times, and get averaged result score = evaluate_policy( - eval_env, agent, opt.max_action, turns=3) + eval_env, agent, opt.max_action, turns=1) if opt.write: writer.add_scalar( 'ep_r', score, global_step=total_steps) diff --git a/env.py b/env.py index d7edea7..0ad2e18 100644 --- a/env.py +++ b/env.py @@ -40,7 +40,7 @@ class PartitionMazeEnv(gym.Env): # 可能需要手动修改的超参数 ############################## self.CUT_NUM = 4 # 横切一半,竖切一半 - self.BASE_LINE = 4000 # 基准时间,通过greedy或者蒙特卡洛计算出来 + self.BASE_LINE = 3500 # 基准时间,通过greedy或者蒙特卡洛计算出来 self.MAX_STEPS = 50 # 迷宫走法步数上限 self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段 @@ -290,10 +290,9 @@ class PartitionMazeEnv(gym.Env): # 区域覆盖完毕,根据轨迹计算各车队的执行时间 T = max([self._compute_motorcade_time(idx) for idx in range(self.num_cars)]) - # print(T) - # print(self.partition_values) - # print(self.car_traj) reward += self.BASE_LINE / T * 1000 + # reward += self.BASE_LINE - T + # print(reward) elif done and self.step_count >= self.MAX_STEPS: reward += -1000