环境增加delay_time
This commit is contained in:
parent
5b468deb9d
commit
a9ee5ceec7
44
env.py
44
env.py
@ -41,7 +41,7 @@ class PartitionMazeEnv(gym.Env):
|
||||
##############################
|
||||
self.CUT_NUM = 4 # 横切一半,竖切一半
|
||||
self.BASE_LINE = 3500 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
||||
self.MAX_STEPS = 50 # 迷宫走法步数上限
|
||||
self.MAX_STEPS = 10 # 迷宫走法步数上限
|
||||
|
||||
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
||||
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
||||
@ -57,7 +57,7 @@ class PartitionMazeEnv(gym.Env):
|
||||
# 阶段 1 状态:区域访问状态向量(长度为(CUT_NUM/2+1)^2)
|
||||
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
||||
self.observation_space = spaces.Box(
|
||||
low=0.0, high=100.0, shape=(self.CUT_NUM + max_regions,), dtype=np.float32)
|
||||
low=0.0, high=100.0, shape=(self.CUT_NUM + max_regions + 1,), dtype=np.float32)
|
||||
|
||||
# 切分阶段相关变量
|
||||
self.col_cuts = [] # 存储竖切位置(c₁, c₂),当值为0时表示不切
|
||||
@ -71,7 +71,7 @@ class PartitionMazeEnv(gym.Env):
|
||||
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
|
||||
self.car_traj = [[] for _ in range(self.num_cars)]
|
||||
self.current_car_index = 0
|
||||
self.previous_T = 0
|
||||
self.delay_time = 0
|
||||
|
||||
def reset(self, seed=None, options=None):
|
||||
# 重置所有变量,回到切分阶段(phase 0)
|
||||
@ -87,12 +87,14 @@ class PartitionMazeEnv(gym.Env):
|
||||
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
|
||||
self.car_traj = [[] for _ in range(self.num_cars)]
|
||||
self.current_car_index = 0
|
||||
self.delay_time = 0
|
||||
|
||||
# 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0)
|
||||
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
||||
state = np.concatenate([
|
||||
self.partition_values,
|
||||
np.zeros(max_regions, dtype=np.float32)
|
||||
np.zeros(max_regions, dtype=np.float32),
|
||||
[0.0]
|
||||
])
|
||||
return state
|
||||
|
||||
@ -110,7 +112,8 @@ class PartitionMazeEnv(gym.Env):
|
||||
# 构造当前状态:前 partition_step 个为已决策值,其余为 0,再补 7 个 0
|
||||
state = np.concatenate([
|
||||
self.partition_values,
|
||||
np.zeros((self.CUT_NUM // 2 + 1) ** 2, dtype=np.float32)
|
||||
np.zeros((self.CUT_NUM // 2 + 1) ** 2, dtype=np.float32),
|
||||
[0.0]
|
||||
])
|
||||
|
||||
# 如果未完成 4 步,则仍处于切分阶段,不发奖励,done 为 False
|
||||
@ -164,7 +167,8 @@ class PartitionMazeEnv(gym.Env):
|
||||
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
||||
state = np.concatenate([
|
||||
self.partition_values,
|
||||
np.zeros(max_regions, dtype=np.float32)
|
||||
np.zeros(max_regions, dtype=np.float32),
|
||||
[0.0]
|
||||
])
|
||||
return state, reward, True, False, {}
|
||||
else:
|
||||
@ -209,7 +213,7 @@ class PartitionMazeEnv(gym.Env):
|
||||
for i in range(idx + 1, max_regions):
|
||||
visit_status[i] = 100
|
||||
state = np.concatenate(
|
||||
[self.partition_values, visit_status])
|
||||
[self.partition_values, visit_status, [0.0]])
|
||||
return state, reward, False, False, {}
|
||||
|
||||
elif self.phase == 2:
|
||||
@ -276,9 +280,14 @@ class PartitionMazeEnv(gym.Env):
|
||||
# 新一轮的开始,初始化移动标记
|
||||
self.cars_moved = [False] * self.num_cars
|
||||
self.cars_moved[current_car] = car_moved
|
||||
# 计算当前的 T 值
|
||||
current_T = max([self._compute_motorcade_time(idx)
|
||||
for idx in range(self.num_cars)])
|
||||
# 如果一轮结束,检查是否所有车辆都没有移动
|
||||
if self.current_car_index == (self.num_cars - 1) and not any(self.cars_moved):
|
||||
reward -= 0.01
|
||||
# 增加时间 BASE_LINE / T * 10
|
||||
self.delay_time += self.BASE_LINE * (1 / self.MAX_STEPS)
|
||||
real_T = current_T + self.delay_time
|
||||
|
||||
self.step_count += 1
|
||||
self.current_car_index = (
|
||||
@ -297,18 +306,21 @@ class PartitionMazeEnv(gym.Env):
|
||||
self.rectangles[(i, j)]['is_visited'])
|
||||
for i in range(idx + 1, max_regions):
|
||||
visit_status[i] = 100
|
||||
state = np.concatenate([self.partition_values, visit_status])
|
||||
# 在状态向量最后增加一维,表示当前的 T 值
|
||||
state = np.concatenate(
|
||||
[self.partition_values, visit_status, [real_T]])
|
||||
|
||||
# Episode 终止条件:所有网格均被访问或步数达到上限
|
||||
done = all([value['is_visited'] for _, value in self.rectangles.items()]) or (
|
||||
self.step_count >= self.MAX_STEPS)
|
||||
if done and all([value['is_visited'] for _, value in self.rectangles.items()]):
|
||||
# 区域覆盖完毕,根据轨迹计算各车队的执行时间
|
||||
T = max([self._compute_motorcade_time(idx)
|
||||
for idx in range(self.num_cars)])
|
||||
# TODO 让奖励在baseline附近变化更剧烈
|
||||
# reward = math.exp(-T / self.BASE_LINE) * 1000
|
||||
reward += self.BASE_LINE / T
|
||||
# # 区域覆盖完毕,根据轨迹计算各车队的执行时间
|
||||
# T = max([self._compute_motorcade_time(idx)
|
||||
# for idx in range(self.num_cars)])
|
||||
# # TODO 让奖励在baseline附近变化更剧烈
|
||||
# # reward = math.exp(-T / self.BASE_LINE) * 1000
|
||||
reward += self.BASE_LINE / real_T * 5
|
||||
print(real_T, "="*20)
|
||||
|
||||
# if reward > self.BASE_LINE:
|
||||
# reward -= 200
|
||||
@ -316,7 +328,7 @@ class PartitionMazeEnv(gym.Env):
|
||||
# reward -= 10 * self.step_count
|
||||
# TODO 动态调整baseline
|
||||
elif done and self.step_count >= self.MAX_STEPS:
|
||||
reward += -0.8
|
||||
reward += -5
|
||||
|
||||
return state, reward, done, False, {}
|
||||
|
||||
|
@ -1,13 +1,13 @@
|
||||
# from env import PartitionMazeEnv
|
||||
from env_dis import PartitionMazeEnv
|
||||
from env import PartitionMazeEnv
|
||||
# from env_dis import PartitionMazeEnv
|
||||
|
||||
env = PartitionMazeEnv()
|
||||
|
||||
state = env.reset()
|
||||
print(state)
|
||||
|
||||
# action_series = [[0], [0], [0.4], [0], [0.1]]
|
||||
action_series = [0, 0, 3, 0, 10]
|
||||
action_series = [[0.67], [0], [0], [0], [0.7]]
|
||||
# action_series = [0, 0, 3, 0, 10]
|
||||
|
||||
for i in range(100):
|
||||
action = action_series[i]
|
||||
|
Loading…
Reference in New Issue
Block a user