环境增加delay_time

This commit is contained in:
weixin_46229132 2025-03-22 09:47:52 +08:00
parent 5b468deb9d
commit a9ee5ceec7
2 changed files with 32 additions and 20 deletions

44
env.py
View File

@ -41,7 +41,7 @@ class PartitionMazeEnv(gym.Env):
############################## ##############################
self.CUT_NUM = 4 # 横切一半,竖切一半 self.CUT_NUM = 4 # 横切一半,竖切一半
self.BASE_LINE = 3500 # 基准时间通过greedy或者蒙特卡洛计算出来 self.BASE_LINE = 3500 # 基准时间通过greedy或者蒙特卡洛计算出来
self.MAX_STEPS = 50 # 迷宫走法步数上限 self.MAX_STEPS = 10 # 迷宫走法步数上限
self.phase = 0 # 阶段控制0区域划分阶段1迷宫初始化阶段2走迷宫阶段 self.phase = 0 # 阶段控制0区域划分阶段1迷宫初始化阶段2走迷宫阶段
self.partition_step = 0 # 区域划分阶段步数,范围 0~4 self.partition_step = 0 # 区域划分阶段步数,范围 0~4
@ -57,7 +57,7 @@ class PartitionMazeEnv(gym.Env):
# 阶段 1 状态:区域访问状态向量(长度为(CUT_NUM/2+1)^2 # 阶段 1 状态:区域访问状态向量(长度为(CUT_NUM/2+1)^2
max_regions = (self.CUT_NUM // 2 + 1) ** 2 max_regions = (self.CUT_NUM // 2 + 1) ** 2
self.observation_space = spaces.Box( self.observation_space = spaces.Box(
low=0.0, high=100.0, shape=(self.CUT_NUM + max_regions,), dtype=np.float32) low=0.0, high=100.0, shape=(self.CUT_NUM + max_regions + 1,), dtype=np.float32)
# 切分阶段相关变量 # 切分阶段相关变量
self.col_cuts = [] # 存储竖切位置c₁, c₂当值为0时表示不切 self.col_cuts = [] # 存储竖切位置c₁, c₂当值为0时表示不切
@ -71,7 +71,7 @@ class PartitionMazeEnv(gym.Env):
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)] self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
self.car_traj = [[] for _ in range(self.num_cars)] self.car_traj = [[] for _ in range(self.num_cars)]
self.current_car_index = 0 self.current_car_index = 0
self.previous_T = 0 self.delay_time = 0
def reset(self, seed=None, options=None): def reset(self, seed=None, options=None):
# 重置所有变量回到切分阶段phase 0 # 重置所有变量回到切分阶段phase 0
@ -87,12 +87,14 @@ class PartitionMazeEnv(gym.Env):
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)] self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
self.car_traj = [[] for _ in range(self.num_cars)] self.car_traj = [[] for _ in range(self.num_cars)]
self.current_car_index = 0 self.current_car_index = 0
self.delay_time = 0
# 状态:前 4 维为 partition_values其余为区域访问状态初始全0 # 状态:前 4 维为 partition_values其余为区域访问状态初始全0
max_regions = (self.CUT_NUM // 2 + 1) ** 2 max_regions = (self.CUT_NUM // 2 + 1) ** 2
state = np.concatenate([ state = np.concatenate([
self.partition_values, self.partition_values,
np.zeros(max_regions, dtype=np.float32) np.zeros(max_regions, dtype=np.float32),
[0.0]
]) ])
return state return state
@ -110,7 +112,8 @@ class PartitionMazeEnv(gym.Env):
# 构造当前状态:前 partition_step 个为已决策值,其余为 0再补 7 个 0 # 构造当前状态:前 partition_step 个为已决策值,其余为 0再补 7 个 0
state = np.concatenate([ state = np.concatenate([
self.partition_values, self.partition_values,
np.zeros((self.CUT_NUM // 2 + 1) ** 2, dtype=np.float32) np.zeros((self.CUT_NUM // 2 + 1) ** 2, dtype=np.float32),
[0.0]
]) ])
# 如果未完成 4 步则仍处于切分阶段不发奖励done 为 False # 如果未完成 4 步则仍处于切分阶段不发奖励done 为 False
@ -164,7 +167,8 @@ class PartitionMazeEnv(gym.Env):
max_regions = (self.CUT_NUM // 2 + 1) ** 2 max_regions = (self.CUT_NUM // 2 + 1) ** 2
state = np.concatenate([ state = np.concatenate([
self.partition_values, self.partition_values,
np.zeros(max_regions, dtype=np.float32) np.zeros(max_regions, dtype=np.float32),
[0.0]
]) ])
return state, reward, True, False, {} return state, reward, True, False, {}
else: else:
@ -209,7 +213,7 @@ class PartitionMazeEnv(gym.Env):
for i in range(idx + 1, max_regions): for i in range(idx + 1, max_regions):
visit_status[i] = 100 visit_status[i] = 100
state = np.concatenate( state = np.concatenate(
[self.partition_values, visit_status]) [self.partition_values, visit_status, [0.0]])
return state, reward, False, False, {} return state, reward, False, False, {}
elif self.phase == 2: elif self.phase == 2:
@ -276,9 +280,14 @@ class PartitionMazeEnv(gym.Env):
# 新一轮的开始,初始化移动标记 # 新一轮的开始,初始化移动标记
self.cars_moved = [False] * self.num_cars self.cars_moved = [False] * self.num_cars
self.cars_moved[current_car] = car_moved self.cars_moved[current_car] = car_moved
# 计算当前的 T 值
current_T = max([self._compute_motorcade_time(idx)
for idx in range(self.num_cars)])
# 如果一轮结束,检查是否所有车辆都没有移动 # 如果一轮结束,检查是否所有车辆都没有移动
if self.current_car_index == (self.num_cars - 1) and not any(self.cars_moved): if self.current_car_index == (self.num_cars - 1) and not any(self.cars_moved):
reward -= 0.01 # 增加时间 BASE_LINE / T * 10
self.delay_time += self.BASE_LINE * (1 / self.MAX_STEPS)
real_T = current_T + self.delay_time
self.step_count += 1 self.step_count += 1
self.current_car_index = ( self.current_car_index = (
@ -297,18 +306,21 @@ class PartitionMazeEnv(gym.Env):
self.rectangles[(i, j)]['is_visited']) self.rectangles[(i, j)]['is_visited'])
for i in range(idx + 1, max_regions): for i in range(idx + 1, max_regions):
visit_status[i] = 100 visit_status[i] = 100
state = np.concatenate([self.partition_values, visit_status]) # 在状态向量最后增加一维,表示当前的 T 值
state = np.concatenate(
[self.partition_values, visit_status, [real_T]])
# Episode 终止条件:所有网格均被访问或步数达到上限 # Episode 终止条件:所有网格均被访问或步数达到上限
done = all([value['is_visited'] for _, value in self.rectangles.items()]) or ( done = all([value['is_visited'] for _, value in self.rectangles.items()]) or (
self.step_count >= self.MAX_STEPS) self.step_count >= self.MAX_STEPS)
if done and all([value['is_visited'] for _, value in self.rectangles.items()]): if done and all([value['is_visited'] for _, value in self.rectangles.items()]):
# 区域覆盖完毕,根据轨迹计算各车队的执行时间 # # 区域覆盖完毕,根据轨迹计算各车队的执行时间
T = max([self._compute_motorcade_time(idx) # T = max([self._compute_motorcade_time(idx)
for idx in range(self.num_cars)]) # for idx in range(self.num_cars)])
# TODO 让奖励在baseline附近变化更剧烈 # # TODO 让奖励在baseline附近变化更剧烈
# reward = math.exp(-T / self.BASE_LINE) * 1000 # # reward = math.exp(-T / self.BASE_LINE) * 1000
reward += self.BASE_LINE / T reward += self.BASE_LINE / real_T * 5
print(real_T, "="*20)
# if reward > self.BASE_LINE: # if reward > self.BASE_LINE:
# reward -= 200 # reward -= 200
@ -316,7 +328,7 @@ class PartitionMazeEnv(gym.Env):
# reward -= 10 * self.step_count # reward -= 10 * self.step_count
# TODO 动态调整baseline # TODO 动态调整baseline
elif done and self.step_count >= self.MAX_STEPS: elif done and self.step_count >= self.MAX_STEPS:
reward += -0.8 reward += -5
return state, reward, done, False, {} return state, reward, done, False, {}

View File

@ -1,13 +1,13 @@
# from env import PartitionMazeEnv from env import PartitionMazeEnv
from env_dis import PartitionMazeEnv # from env_dis import PartitionMazeEnv
env = PartitionMazeEnv() env = PartitionMazeEnv()
state = env.reset() state = env.reset()
print(state) print(state)
# action_series = [[0], [0], [0.4], [0], [0.1]] action_series = [[0.67], [0], [0], [0], [0.7]]
action_series = [0, 0, 3, 0, 10] # action_series = [0, 0, 3, 0, 10]
for i in range(100): for i in range(100):
action = action_series[i] action = action_series[i]