diff --git a/PPO/env.py b/PPO/env.py index c09014e..08f5861 100644 --- a/PPO/env.py +++ b/PPO/env.py @@ -64,7 +64,7 @@ class PartitionMazeEnv(gym.Env): self.BASE_LINE = 3500.0 # 基准时间,通过greedy或者蒙特卡洛计算出来 self.step_count = 0 self.rectangles = {} - self.car_pos = [(0, 0) for _ in range(self.num_cars)] + self.car_pos = [(0.5, 0.5) for _ in range(self.num_cars)] self.car_traj = [[] for _ in range(self.num_cars)] self.current_car_index = 0 @@ -79,7 +79,7 @@ class PartitionMazeEnv(gym.Env): self.region_centers = [] self.step_count = 0 self.rectangles = {} - self.car_pos = [(0, 0) for _ in range(self.num_cars)] + self.car_pos = [(0.5, 0.5) for _ in range(self.num_cars)] self.car_traj = [[] for _ in range(self.num_cars)] self.current_car_index = 0 # 状态:前 4 维为 partition_values,其余补 0 @@ -157,9 +157,6 @@ class PartitionMazeEnv(gym.Env): # 进入阶段 1:初始化迷宫 self.phase = 1 - # 所有车队从整个区域的中心出发 - self.car_pos = [(len(self.row_cuts) - 2 / 2, len(self.col_cuts) -2 / 2) - for _ in range(self.num_cars)] state = np.concatenate( [self.partition_values, np.array(self.car_pos).flatten()]) return state, reward, False, False, {} @@ -245,8 +242,9 @@ class PartitionMazeEnv(gym.Env): T = max([self._compute_motorcade_time(idx) for idx in range(self.num_cars)]) # print(T) + # print(self.partition_values) # print(self.car_traj) - reward += -(T - self.BASE_LINE) + reward += self.BASE_LINE / T * 100 elif done and self.step_count >= self.MAX_STEPS: reward += -10000 @@ -277,6 +275,8 @@ class PartitionMazeEnv(gym.Env): print("Phase 1: Initialize maze environment.") print(f"Partition values so far: {self.partition_values}") print(f"Motorcade positon: {self.car_pos}") + # input('1111') elif self.phase == 2: print("Phase 2: Play maze.") print(f'Motorcade trajectory: {self.car_traj}') + # input('2222')