调整奖励

This commit is contained in:
weixin_46229132 2025-03-13 15:55:14 +08:00
parent aecd86b245
commit ee914ff930

View File

@ -64,7 +64,7 @@ class PartitionMazeEnv(gym.Env):
self.BASE_LINE = 3500.0 # 基准时间通过greedy或者蒙特卡洛计算出来
self.step_count = 0
self.rectangles = {}
self.car_pos = [(0, 0) for _ in range(self.num_cars)]
self.car_pos = [(0.5, 0.5) for _ in range(self.num_cars)]
self.car_traj = [[] for _ in range(self.num_cars)]
self.current_car_index = 0
@ -79,7 +79,7 @@ class PartitionMazeEnv(gym.Env):
self.region_centers = []
self.step_count = 0
self.rectangles = {}
self.car_pos = [(0, 0) for _ in range(self.num_cars)]
self.car_pos = [(0.5, 0.5) for _ in range(self.num_cars)]
self.car_traj = [[] for _ in range(self.num_cars)]
self.current_car_index = 0
# 状态:前 4 维为 partition_values其余补 0
@ -157,9 +157,6 @@ class PartitionMazeEnv(gym.Env):
# 进入阶段 1初始化迷宫
self.phase = 1
# 所有车队从整个区域的中心出发
self.car_pos = [(len(self.row_cuts) - 2 / 2, len(self.col_cuts) -2 / 2)
for _ in range(self.num_cars)]
state = np.concatenate(
[self.partition_values, np.array(self.car_pos).flatten()])
return state, reward, False, False, {}
@ -245,8 +242,9 @@ class PartitionMazeEnv(gym.Env):
T = max([self._compute_motorcade_time(idx)
for idx in range(self.num_cars)])
# print(T)
# print(self.partition_values)
# print(self.car_traj)
reward += -(T - self.BASE_LINE)
reward += self.BASE_LINE / T * 100
elif done and self.step_count >= self.MAX_STEPS:
reward += -10000
@ -277,6 +275,8 @@ class PartitionMazeEnv(gym.Env):
print("Phase 1: Initialize maze environment.")
print(f"Partition values so far: {self.partition_values}")
print(f"Motorcade positon: {self.car_pos}")
# input('1111')
elif self.phase == 2:
print("Phase 2: Play maze.")
print(f'Motorcade trajectory: {self.car_traj}')
# input('2222')