环境增加delay_time
This commit is contained in:
parent
5b468deb9d
commit
a9ee5ceec7
44
env.py
44
env.py
@ -41,7 +41,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
##############################
|
##############################
|
||||||
self.CUT_NUM = 4 # 横切一半,竖切一半
|
self.CUT_NUM = 4 # 横切一半,竖切一半
|
||||||
self.BASE_LINE = 3500 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
self.BASE_LINE = 3500 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
||||||
self.MAX_STEPS = 50 # 迷宫走法步数上限
|
self.MAX_STEPS = 10 # 迷宫走法步数上限
|
||||||
|
|
||||||
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
||||||
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
||||||
@ -57,7 +57,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
# 阶段 1 状态:区域访问状态向量(长度为(CUT_NUM/2+1)^2)
|
# 阶段 1 状态:区域访问状态向量(长度为(CUT_NUM/2+1)^2)
|
||||||
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
||||||
self.observation_space = spaces.Box(
|
self.observation_space = spaces.Box(
|
||||||
low=0.0, high=100.0, shape=(self.CUT_NUM + max_regions,), dtype=np.float32)
|
low=0.0, high=100.0, shape=(self.CUT_NUM + max_regions + 1,), dtype=np.float32)
|
||||||
|
|
||||||
# 切分阶段相关变量
|
# 切分阶段相关变量
|
||||||
self.col_cuts = [] # 存储竖切位置(c₁, c₂),当值为0时表示不切
|
self.col_cuts = [] # 存储竖切位置(c₁, c₂),当值为0时表示不切
|
||||||
@ -71,7 +71,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
|
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
|
||||||
self.car_traj = [[] for _ in range(self.num_cars)]
|
self.car_traj = [[] for _ in range(self.num_cars)]
|
||||||
self.current_car_index = 0
|
self.current_car_index = 0
|
||||||
self.previous_T = 0
|
self.delay_time = 0
|
||||||
|
|
||||||
def reset(self, seed=None, options=None):
|
def reset(self, seed=None, options=None):
|
||||||
# 重置所有变量,回到切分阶段(phase 0)
|
# 重置所有变量,回到切分阶段(phase 0)
|
||||||
@ -87,12 +87,14 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
|
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
|
||||||
self.car_traj = [[] for _ in range(self.num_cars)]
|
self.car_traj = [[] for _ in range(self.num_cars)]
|
||||||
self.current_car_index = 0
|
self.current_car_index = 0
|
||||||
|
self.delay_time = 0
|
||||||
|
|
||||||
# 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0)
|
# 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0)
|
||||||
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
||||||
state = np.concatenate([
|
state = np.concatenate([
|
||||||
self.partition_values,
|
self.partition_values,
|
||||||
np.zeros(max_regions, dtype=np.float32)
|
np.zeros(max_regions, dtype=np.float32),
|
||||||
|
[0.0]
|
||||||
])
|
])
|
||||||
return state
|
return state
|
||||||
|
|
||||||
@ -110,7 +112,8 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
# 构造当前状态:前 partition_step 个为已决策值,其余为 0,再补 7 个 0
|
# 构造当前状态:前 partition_step 个为已决策值,其余为 0,再补 7 个 0
|
||||||
state = np.concatenate([
|
state = np.concatenate([
|
||||||
self.partition_values,
|
self.partition_values,
|
||||||
np.zeros((self.CUT_NUM // 2 + 1) ** 2, dtype=np.float32)
|
np.zeros((self.CUT_NUM // 2 + 1) ** 2, dtype=np.float32),
|
||||||
|
[0.0]
|
||||||
])
|
])
|
||||||
|
|
||||||
# 如果未完成 4 步,则仍处于切分阶段,不发奖励,done 为 False
|
# 如果未完成 4 步,则仍处于切分阶段,不发奖励,done 为 False
|
||||||
@ -164,7 +167,8 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
||||||
state = np.concatenate([
|
state = np.concatenate([
|
||||||
self.partition_values,
|
self.partition_values,
|
||||||
np.zeros(max_regions, dtype=np.float32)
|
np.zeros(max_regions, dtype=np.float32),
|
||||||
|
[0.0]
|
||||||
])
|
])
|
||||||
return state, reward, True, False, {}
|
return state, reward, True, False, {}
|
||||||
else:
|
else:
|
||||||
@ -209,7 +213,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
for i in range(idx + 1, max_regions):
|
for i in range(idx + 1, max_regions):
|
||||||
visit_status[i] = 100
|
visit_status[i] = 100
|
||||||
state = np.concatenate(
|
state = np.concatenate(
|
||||||
[self.partition_values, visit_status])
|
[self.partition_values, visit_status, [0.0]])
|
||||||
return state, reward, False, False, {}
|
return state, reward, False, False, {}
|
||||||
|
|
||||||
elif self.phase == 2:
|
elif self.phase == 2:
|
||||||
@ -276,9 +280,14 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
# 新一轮的开始,初始化移动标记
|
# 新一轮的开始,初始化移动标记
|
||||||
self.cars_moved = [False] * self.num_cars
|
self.cars_moved = [False] * self.num_cars
|
||||||
self.cars_moved[current_car] = car_moved
|
self.cars_moved[current_car] = car_moved
|
||||||
|
# 计算当前的 T 值
|
||||||
|
current_T = max([self._compute_motorcade_time(idx)
|
||||||
|
for idx in range(self.num_cars)])
|
||||||
# 如果一轮结束,检查是否所有车辆都没有移动
|
# 如果一轮结束,检查是否所有车辆都没有移动
|
||||||
if self.current_car_index == (self.num_cars - 1) and not any(self.cars_moved):
|
if self.current_car_index == (self.num_cars - 1) and not any(self.cars_moved):
|
||||||
reward -= 0.01
|
# 增加时间 BASE_LINE / T * 10
|
||||||
|
self.delay_time += self.BASE_LINE * (1 / self.MAX_STEPS)
|
||||||
|
real_T = current_T + self.delay_time
|
||||||
|
|
||||||
self.step_count += 1
|
self.step_count += 1
|
||||||
self.current_car_index = (
|
self.current_car_index = (
|
||||||
@ -297,18 +306,21 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
self.rectangles[(i, j)]['is_visited'])
|
self.rectangles[(i, j)]['is_visited'])
|
||||||
for i in range(idx + 1, max_regions):
|
for i in range(idx + 1, max_regions):
|
||||||
visit_status[i] = 100
|
visit_status[i] = 100
|
||||||
state = np.concatenate([self.partition_values, visit_status])
|
# 在状态向量最后增加一维,表示当前的 T 值
|
||||||
|
state = np.concatenate(
|
||||||
|
[self.partition_values, visit_status, [real_T]])
|
||||||
|
|
||||||
# Episode 终止条件:所有网格均被访问或步数达到上限
|
# Episode 终止条件:所有网格均被访问或步数达到上限
|
||||||
done = all([value['is_visited'] for _, value in self.rectangles.items()]) or (
|
done = all([value['is_visited'] for _, value in self.rectangles.items()]) or (
|
||||||
self.step_count >= self.MAX_STEPS)
|
self.step_count >= self.MAX_STEPS)
|
||||||
if done and all([value['is_visited'] for _, value in self.rectangles.items()]):
|
if done and all([value['is_visited'] for _, value in self.rectangles.items()]):
|
||||||
# 区域覆盖完毕,根据轨迹计算各车队的执行时间
|
# # 区域覆盖完毕,根据轨迹计算各车队的执行时间
|
||||||
T = max([self._compute_motorcade_time(idx)
|
# T = max([self._compute_motorcade_time(idx)
|
||||||
for idx in range(self.num_cars)])
|
# for idx in range(self.num_cars)])
|
||||||
# TODO 让奖励在baseline附近变化更剧烈
|
# # TODO 让奖励在baseline附近变化更剧烈
|
||||||
# reward = math.exp(-T / self.BASE_LINE) * 1000
|
# # reward = math.exp(-T / self.BASE_LINE) * 1000
|
||||||
reward += self.BASE_LINE / T
|
reward += self.BASE_LINE / real_T * 5
|
||||||
|
print(real_T, "="*20)
|
||||||
|
|
||||||
# if reward > self.BASE_LINE:
|
# if reward > self.BASE_LINE:
|
||||||
# reward -= 200
|
# reward -= 200
|
||||||
@ -316,7 +328,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
# reward -= 10 * self.step_count
|
# reward -= 10 * self.step_count
|
||||||
# TODO 动态调整baseline
|
# TODO 动态调整baseline
|
||||||
elif done and self.step_count >= self.MAX_STEPS:
|
elif done and self.step_count >= self.MAX_STEPS:
|
||||||
reward += -0.8
|
reward += -5
|
||||||
|
|
||||||
return state, reward, done, False, {}
|
return state, reward, done, False, {}
|
||||||
|
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
# from env import PartitionMazeEnv
|
from env import PartitionMazeEnv
|
||||||
from env_dis import PartitionMazeEnv
|
# from env_dis import PartitionMazeEnv
|
||||||
|
|
||||||
env = PartitionMazeEnv()
|
env = PartitionMazeEnv()
|
||||||
|
|
||||||
state = env.reset()
|
state = env.reset()
|
||||||
print(state)
|
print(state)
|
||||||
|
|
||||||
# action_series = [[0], [0], [0.4], [0], [0.1]]
|
action_series = [[0.67], [0], [0], [0], [0.7]]
|
||||||
action_series = [0, 0, 3, 0, 10]
|
# action_series = [0, 0, 3, 0, 10]
|
||||||
|
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
action = action_series[i]
|
action = action_series[i]
|
||||||
|
Loading…
Reference in New Issue
Block a user