From ff23b5e7456c9fbf54851f94ec5e878fca1be3f9 Mon Sep 17 00:00:00 2001
From: weixin_46229132 <weixin_46229132@noreply.gitcode.com>
Date: Wed, 19 Mar 2025 16:31:23 +0800
Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4=E5=A5=96=E5=8A=B1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 PPO_Continuous/main.py | 4 +---
 env.py                 | 7 +++----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/PPO_Continuous/main.py b/PPO_Continuous/main.py
index 8aded0a..d7d8e87 100644
--- a/PPO_Continuous/main.py
+++ b/PPO_Continuous/main.py
@@ -107,8 +107,6 @@ def main():
     #     kwargs["a_lr"] *= 2
     #     kwargs["c_lr"] *= 4
 
-    if not os.path.exists('model'):
-        os.mkdir('model')
     # transfer opt to dictionary, and use it to init PPO_agent
     agent = PPO_agent(**vars(opt))
     if opt.Loadmodel:
@@ -154,7 +152,7 @@ def main():
                 if total_steps % opt.eval_interval == 0:
                     # evaluate the policy for 3 times, and get averaged result
                     score = evaluate_policy(
-                        eval_env, agent, opt.max_action, turns=3)
+                        eval_env, agent, opt.max_action, turns=1)
                     if opt.write:
                         writer.add_scalar(
                             'ep_r', score, global_step=total_steps)
diff --git a/env.py b/env.py
index d7edea7..0ad2e18 100644
--- a/env.py
+++ b/env.py
@@ -40,7 +40,7 @@ class PartitionMazeEnv(gym.Env):
         # 可能需要手动修改的超参数
         ##############################
         self.CUT_NUM = 4    # 横切一半，竖切一半
-        self.BASE_LINE = 4000     # 基准时间，通过greedy或者蒙特卡洛计算出来
+        self.BASE_LINE = 3500     # 基准时间，通过greedy或者蒙特卡洛计算出来
         self.MAX_STEPS = 50        # 迷宫走法步数上限
 
         self.phase = 0    # 阶段控制，0：区域划分阶段，1：迷宫初始化阶段，2：走迷宫阶段
@@ -290,10 +290,9 @@ class PartitionMazeEnv(gym.Env):
                 # 区域覆盖完毕，根据轨迹计算各车队的执行时间
                 T = max([self._compute_motorcade_time(idx)
                         for idx in range(self.num_cars)])
-                # print(T)
-                # print(self.partition_values)
-                # print(self.car_traj)
                 reward += self.BASE_LINE / T * 1000
+                # reward += self.BASE_LINE - T
+                # print(reward)
             elif done and self.step_count >= self.MAX_STEPS:
                 reward += -1000