调整奖励
This commit is contained in:
parent
aecd86b245
commit
ee914ff930
12
PPO/env.py
12
PPO/env.py
@ -64,7 +64,7 @@ class PartitionMazeEnv(gym.Env):
|
||||
self.BASE_LINE = 3500.0 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
||||
self.step_count = 0
|
||||
self.rectangles = {}
|
||||
self.car_pos = [(0, 0) for _ in range(self.num_cars)]
|
||||
self.car_pos = [(0.5, 0.5) for _ in range(self.num_cars)]
|
||||
self.car_traj = [[] for _ in range(self.num_cars)]
|
||||
self.current_car_index = 0
|
||||
|
||||
@ -79,7 +79,7 @@ class PartitionMazeEnv(gym.Env):
|
||||
self.region_centers = []
|
||||
self.step_count = 0
|
||||
self.rectangles = {}
|
||||
self.car_pos = [(0, 0) for _ in range(self.num_cars)]
|
||||
self.car_pos = [(0.5, 0.5) for _ in range(self.num_cars)]
|
||||
self.car_traj = [[] for _ in range(self.num_cars)]
|
||||
self.current_car_index = 0
|
||||
# 状态:前 4 维为 partition_values,其余补 0
|
||||
@ -157,9 +157,6 @@ class PartitionMazeEnv(gym.Env):
|
||||
|
||||
# 进入阶段 1:初始化迷宫
|
||||
self.phase = 1
|
||||
# 所有车队从整个区域的中心出发
|
||||
self.car_pos = [(len(self.row_cuts) - 2 / 2, len(self.col_cuts) -2 / 2)
|
||||
for _ in range(self.num_cars)]
|
||||
state = np.concatenate(
|
||||
[self.partition_values, np.array(self.car_pos).flatten()])
|
||||
return state, reward, False, False, {}
|
||||
@ -245,8 +242,9 @@ class PartitionMazeEnv(gym.Env):
|
||||
T = max([self._compute_motorcade_time(idx)
|
||||
for idx in range(self.num_cars)])
|
||||
# print(T)
|
||||
# print(self.partition_values)
|
||||
# print(self.car_traj)
|
||||
reward += -(T - self.BASE_LINE)
|
||||
reward += self.BASE_LINE / T * 100
|
||||
elif done and self.step_count >= self.MAX_STEPS:
|
||||
reward += -10000
|
||||
|
||||
@ -277,6 +275,8 @@ class PartitionMazeEnv(gym.Env):
|
||||
print("Phase 1: Initialize maze environment.")
|
||||
print(f"Partition values so far: {self.partition_values}")
|
||||
print(f"Motorcade positon: {self.car_pos}")
|
||||
# input('1111')
|
||||
elif self.phase == 2:
|
||||
print("Phase 2: Play maze.")
|
||||
print(f'Motorcade trajectory: {self.car_traj}')
|
||||
# input('2222')
|
||||
|
Loading…
Reference in New Issue
Block a user