2025-03-11 16:01:07 +08:00
|
|
|
|
import gymnasium as gym
|
|
|
|
|
from gymnasium import spaces
|
|
|
|
|
import numpy as np
|
2025-03-11 19:43:04 +08:00
|
|
|
|
import yaml
|
|
|
|
|
import math
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PartitionMazeEnv(gym.Env):
|
|
|
|
|
"""
|
|
|
|
|
自定义环境,分为两阶段:
|
|
|
|
|
阶段 0:区域切分(共 4 步,每一步输出一个标量,用于确定竖切和横切位置)。
|
|
|
|
|
切分顺序为:第一步输出 c₁,第二步输出 c₂,第三步输出 r₁,第四步输出 r₂。
|
|
|
|
|
离散化后取值仅为 {0, 0.1, 0.2, …, 0.9}(其中 0 表示不切)。
|
|
|
|
|
阶段 1:车辆路径规划(走迷宫),车辆从区域中心出发,在九宫格内按照上下左右移动,
|
|
|
|
|
直到所有目标格子被覆盖或步数上限达到。
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, config=None):
|
|
|
|
|
super(PartitionMazeEnv, self).__init__()
|
|
|
|
|
# 车队参数设置
|
2025-03-11 19:43:04 +08:00
|
|
|
|
with open('params.yml', 'r', encoding='utf-8') as file:
|
|
|
|
|
params = yaml.safe_load(file)
|
|
|
|
|
|
|
|
|
|
self.H = params['H']
|
|
|
|
|
self.W = params['W']
|
|
|
|
|
self.num_cars = params['num_cars']
|
|
|
|
|
|
|
|
|
|
self.flight_time_factor = params['flight_time_factor']
|
|
|
|
|
self.comp_time_factor = params['comp_time_factor']
|
|
|
|
|
self.trans_time_factor = params['trans_time_factor']
|
|
|
|
|
self.car_time_factor = params['car_time_factor']
|
|
|
|
|
self.bs_time_factor = params['bs_time_factor']
|
|
|
|
|
|
|
|
|
|
self.flight_energy_factor = params['flight_energy_factor']
|
|
|
|
|
self.comp_energy_factor = params['comp_energy_factor']
|
|
|
|
|
self.trans_energy_factor = params['trans_energy_factor']
|
|
|
|
|
self.battery_energy_capacity = params['battery_energy_capacity']
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
2025-03-14 11:17:12 +08:00
|
|
|
|
##############################
|
|
|
|
|
# 可能需要手动修改的超参数
|
|
|
|
|
##############################
|
2025-03-19 10:58:43 +08:00
|
|
|
|
self.CUT_NUM = 4 # 横切一半,竖切一半
|
2025-03-21 16:04:42 +08:00
|
|
|
|
self.BASE_LINE = 3500 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
2025-03-22 09:47:52 +08:00
|
|
|
|
self.MAX_STEPS = 10 # 迷宫走法步数上限
|
2025-03-14 11:17:12 +08:00
|
|
|
|
|
2025-03-11 16:01:07 +08:00
|
|
|
|
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
|
|
|
|
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
|
|
|
|
self.partition_values = np.zeros(
|
2025-03-14 11:01:02 +08:00
|
|
|
|
self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
# 定义动作空间:全部动作均为 1 维连续 [0,1]
|
|
|
|
|
self.action_space = spaces.Box(
|
|
|
|
|
low=0.0, high=1.0, shape=(1,), dtype=np.float32)
|
|
|
|
|
|
|
|
|
|
# 定义观察空间为8维向量
|
|
|
|
|
# 阶段 0 状态:前 4 维表示已决策的切分值(未决策部分为 0)
|
2025-03-19 14:22:24 +08:00
|
|
|
|
# 阶段 1 状态:区域访问状态向量(长度为(CUT_NUM/2+1)^2)
|
|
|
|
|
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
2025-03-11 16:01:07 +08:00
|
|
|
|
self.observation_space = spaces.Box(
|
2025-03-22 09:47:52 +08:00
|
|
|
|
low=0.0, high=100.0, shape=(self.CUT_NUM + max_regions + 1,), dtype=np.float32)
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
# 切分阶段相关变量
|
2025-03-13 15:09:58 +08:00
|
|
|
|
self.col_cuts = [] # 存储竖切位置(c₁, c₂),当值为0时表示不切
|
|
|
|
|
self.row_cuts = [] # 存储横切位置(r₁, r₂)
|
2025-03-11 19:43:04 +08:00
|
|
|
|
|
|
|
|
|
self.init_maze_step = 0
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
# 路径规划阶段相关变量
|
|
|
|
|
self.step_count = 0
|
|
|
|
|
self.rectangles = {}
|
2025-03-13 21:28:30 +08:00
|
|
|
|
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
|
2025-03-11 16:01:07 +08:00
|
|
|
|
self.car_traj = [[] for _ in range(self.num_cars)]
|
|
|
|
|
self.current_car_index = 0
|
2025-03-22 09:47:52 +08:00
|
|
|
|
self.delay_time = 0
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
def reset(self, seed=None, options=None):
|
|
|
|
|
# 重置所有变量,回到切分阶段(phase 0)
|
|
|
|
|
self.phase = 0
|
|
|
|
|
self.partition_step = 0
|
2025-03-14 11:01:02 +08:00
|
|
|
|
self.partition_values = np.zeros(self.CUT_NUM, dtype=np.float32)
|
2025-03-13 15:09:58 +08:00
|
|
|
|
self.col_cuts = []
|
|
|
|
|
self.row_cuts = []
|
2025-03-11 19:43:04 +08:00
|
|
|
|
self.init_maze_step = 0
|
2025-03-11 16:01:07 +08:00
|
|
|
|
self.region_centers = []
|
|
|
|
|
self.step_count = 0
|
|
|
|
|
self.rectangles = {}
|
2025-03-13 21:28:30 +08:00
|
|
|
|
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
|
2025-03-11 16:01:07 +08:00
|
|
|
|
self.car_traj = [[] for _ in range(self.num_cars)]
|
|
|
|
|
self.current_car_index = 0
|
2025-03-22 09:47:52 +08:00
|
|
|
|
self.delay_time = 0
|
2025-03-21 16:04:42 +08:00
|
|
|
|
|
2025-03-19 14:22:24 +08:00
|
|
|
|
# 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0)
|
|
|
|
|
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
|
|
|
|
state = np.concatenate([
|
2025-03-21 16:04:42 +08:00
|
|
|
|
self.partition_values,
|
2025-03-22 09:47:52 +08:00
|
|
|
|
np.zeros(max_regions, dtype=np.float32),
|
|
|
|
|
[0.0]
|
2025-03-19 14:22:24 +08:00
|
|
|
|
])
|
2025-03-13 21:28:30 +08:00
|
|
|
|
return state
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
def step(self, action):
|
|
|
|
|
# 在所有阶段动作均为 1 维连续动作,取 action[0]
|
|
|
|
|
a = float(action[0])
|
|
|
|
|
|
|
|
|
|
if self.phase == 0:
|
|
|
|
|
# 切分阶段:每一步输出一个标量,离散化为 {0, 0.1, ..., 0.9}
|
|
|
|
|
disc_val = np.floor(a * 10) / 10.0
|
|
|
|
|
disc_val = np.clip(disc_val, 0.0, 0.9)
|
|
|
|
|
self.partition_values[self.partition_step] = disc_val
|
|
|
|
|
self.partition_step += 1
|
|
|
|
|
|
|
|
|
|
# 构造当前状态:前 partition_step 个为已决策值,其余为 0,再补 7 个 0
|
2025-03-19 14:22:24 +08:00
|
|
|
|
state = np.concatenate([
|
2025-03-21 16:04:42 +08:00
|
|
|
|
self.partition_values,
|
2025-03-22 09:47:52 +08:00
|
|
|
|
np.zeros((self.CUT_NUM // 2 + 1) ** 2, dtype=np.float32),
|
|
|
|
|
[0.0]
|
2025-03-19 14:22:24 +08:00
|
|
|
|
])
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
# 如果未完成 4 步,则仍处于切分阶段,不发奖励,done 为 False
|
2025-03-14 11:01:02 +08:00
|
|
|
|
if self.partition_step < self.CUT_NUM:
|
2025-03-11 16:01:07 +08:00
|
|
|
|
return state, 0.0, False, False, {}
|
|
|
|
|
else:
|
|
|
|
|
# 完成 4 步后,计算切分边界
|
|
|
|
|
# 过滤掉 0,并去重后排序
|
|
|
|
|
vert = sorted(set(v for v in self.partition_values[:len(
|
|
|
|
|
self.partition_values) // 2] if v > 0))
|
|
|
|
|
horiz = sorted(set(v for v in self.partition_values[len(
|
|
|
|
|
self.partition_values) // 2:] if v > 0))
|
2025-03-13 15:09:58 +08:00
|
|
|
|
vertical_cuts = vert if vert else []
|
|
|
|
|
horizontal_cuts = horiz if horiz else []
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
# 边界:始终包含 0 和 1
|
2025-03-13 15:09:58 +08:00
|
|
|
|
self.col_cuts = [0.0] + vertical_cuts + [1.0]
|
|
|
|
|
self.row_cuts = [0.0] + horizontal_cuts + [1.0]
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
# 判断分区是否合理,并计算各个分区的任务卸载率ρ
|
|
|
|
|
valid_partition = True
|
2025-03-13 15:09:58 +08:00
|
|
|
|
for i in range(len(self.row_cuts) - 1):
|
|
|
|
|
for j in range(len(self.col_cuts) - 1):
|
|
|
|
|
d = (self.col_cuts[j+1] - self.col_cuts[j]) * self.W * \
|
2025-03-14 09:42:56 +08:00
|
|
|
|
(self.row_cuts[i+1] - self.row_cuts[i]) * self.H
|
2025-03-11 16:01:07 +08:00
|
|
|
|
rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
|
2025-03-11 19:43:04 +08:00
|
|
|
|
(self.comp_time_factor - self.trans_time_factor)
|
|
|
|
|
rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
|
2025-03-11 16:01:07 +08:00
|
|
|
|
(self.comp_energy_factor * d -
|
|
|
|
|
self.trans_energy_factor * d)
|
|
|
|
|
if rho_energy_limit < 0:
|
|
|
|
|
valid_partition = False
|
|
|
|
|
break
|
|
|
|
|
rho = min(rho_time_limit, rho_energy_limit)
|
|
|
|
|
|
|
|
|
|
flight_time = self.flight_time_factor * d
|
2025-03-11 19:43:04 +08:00
|
|
|
|
bs_time = self.bs_time_factor * (1 - rho) * d
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
self.rectangles[(i, j)] = {
|
2025-03-13 15:09:58 +08:00
|
|
|
|
'center': ((self.row_cuts[i] + self.row_cuts[i+1]) * self.H / 2, (self.col_cuts[j+1] + self.col_cuts[j]) * self.W / 2),
|
2025-03-11 16:01:07 +08:00
|
|
|
|
'flight_time': flight_time,
|
2025-03-11 19:43:04 +08:00
|
|
|
|
'bs_time': bs_time,
|
2025-03-11 16:01:07 +08:00
|
|
|
|
'is_visited': False
|
|
|
|
|
}
|
|
|
|
|
if not valid_partition:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if not valid_partition:
|
2025-03-21 16:04:42 +08:00
|
|
|
|
reward = -10
|
2025-03-19 14:22:24 +08:00
|
|
|
|
# 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0)
|
|
|
|
|
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
|
|
|
|
state = np.concatenate([
|
2025-03-21 16:04:42 +08:00
|
|
|
|
self.partition_values,
|
2025-03-22 09:47:52 +08:00
|
|
|
|
np.zeros(max_regions, dtype=np.float32),
|
|
|
|
|
[0.0]
|
2025-03-19 14:22:24 +08:00
|
|
|
|
])
|
2025-03-11 16:01:07 +08:00
|
|
|
|
return state, reward, True, False, {}
|
|
|
|
|
else:
|
2025-03-11 19:43:04 +08:00
|
|
|
|
# 进入阶段 1:初始化迷宫
|
2025-03-11 16:01:07 +08:00
|
|
|
|
self.phase = 1
|
2025-03-21 16:04:42 +08:00
|
|
|
|
reward = 0.2
|
|
|
|
|
|
2025-03-13 21:28:30 +08:00
|
|
|
|
# 构建反向索引,方便后续计算
|
2025-03-21 16:04:42 +08:00
|
|
|
|
self.reverse_rectangles = {
|
|
|
|
|
v['center']: k for k, v in self.rectangles.items()}
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
2025-03-19 10:58:43 +08:00
|
|
|
|
region_centers = [
|
|
|
|
|
(i, j, self.rectangles[(i, j)]['center'])
|
|
|
|
|
for i in range(len(self.row_cuts) - 1)
|
|
|
|
|
for j in range(len(self.col_cuts) - 1)
|
|
|
|
|
]
|
|
|
|
|
# 按照与区域中心的距离从近到远排序
|
|
|
|
|
region_centers.sort(
|
|
|
|
|
key=lambda x: math.dist(x[2], (self.H / 2, self.W / 2))
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 分配最近的区域给每辆车
|
|
|
|
|
for idx in range(self.num_cars):
|
|
|
|
|
i, j, center = region_centers[idx]
|
|
|
|
|
self.car_pos[idx] = center
|
|
|
|
|
self.car_traj[idx].append((i, j))
|
|
|
|
|
self.rectangles[(i, j)]['is_visited'] = True
|
|
|
|
|
|
|
|
|
|
# 进入阶段 2:走迷宫
|
|
|
|
|
self.phase = 2
|
2025-03-21 16:04:42 +08:00
|
|
|
|
|
2025-03-19 14:22:24 +08:00
|
|
|
|
# 构造访问状态向量
|
|
|
|
|
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
|
|
|
|
visit_status = np.zeros(max_regions, dtype=np.float32)
|
2025-03-21 16:04:42 +08:00
|
|
|
|
|
2025-03-19 14:22:24 +08:00
|
|
|
|
# 将实际区域的访问状态填入向量
|
|
|
|
|
for i in range(len(self.row_cuts) - 1):
|
|
|
|
|
for j in range(len(self.col_cuts) - 1):
|
|
|
|
|
idx = i * (len(self.col_cuts) - 1) + j
|
2025-03-21 16:04:42 +08:00
|
|
|
|
visit_status[idx] = float(
|
|
|
|
|
self.rectangles[(i, j)]['is_visited'])
|
2025-03-19 14:22:24 +08:00
|
|
|
|
for i in range(idx + 1, max_regions):
|
|
|
|
|
visit_status[i] = 100
|
2025-03-21 16:04:42 +08:00
|
|
|
|
state = np.concatenate(
|
2025-03-22 09:47:52 +08:00
|
|
|
|
[self.partition_values, visit_status, [0.0]])
|
2025-03-19 10:58:43 +08:00
|
|
|
|
return state, reward, False, False, {}
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
elif self.phase == 2:
|
|
|
|
|
# 阶段 2:路径规划(走迷宫)
|
|
|
|
|
current_car = self.current_car_index
|
2025-03-13 21:28:30 +08:00
|
|
|
|
# 查表,找出当前车辆所在的网格
|
|
|
|
|
current_row, current_col = self.reverse_rectangles[self.car_pos[current_car]]
|
2025-03-21 16:04:42 +08:00
|
|
|
|
|
2025-03-19 11:29:02 +08:00
|
|
|
|
reward = 0
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
# 当前动作 a 为 1 维连续动作,映射到四个方向
|
|
|
|
|
if a < 0.2:
|
|
|
|
|
move_dir = 'up'
|
|
|
|
|
elif a < 0.4:
|
|
|
|
|
move_dir = 'down'
|
|
|
|
|
elif a < 0.6:
|
|
|
|
|
move_dir = 'left'
|
|
|
|
|
elif a < 0.8:
|
|
|
|
|
move_dir = 'right'
|
|
|
|
|
else:
|
|
|
|
|
move_dir = 'stay'
|
|
|
|
|
|
|
|
|
|
# 初始化新的行、列为当前值
|
|
|
|
|
new_row, new_col = current_row, current_col
|
|
|
|
|
|
2025-03-19 11:29:02 +08:00
|
|
|
|
if move_dir == 'up':
|
|
|
|
|
if current_row > 0:
|
|
|
|
|
new_row = current_row - 1
|
2025-03-21 16:04:42 +08:00
|
|
|
|
else: # 错误的移动给一些惩罚?
|
2025-03-19 11:29:02 +08:00
|
|
|
|
new_row = current_row
|
|
|
|
|
# reward -= 10
|
|
|
|
|
elif move_dir == 'down':
|
|
|
|
|
if current_row < len(self.row_cuts) - 2:
|
|
|
|
|
new_row = current_row + 1
|
|
|
|
|
else:
|
|
|
|
|
new_row = current_row
|
|
|
|
|
# reward -= 10
|
|
|
|
|
elif move_dir == 'left':
|
|
|
|
|
if current_col > 0:
|
|
|
|
|
new_col = current_col - 1
|
|
|
|
|
else:
|
|
|
|
|
new_col = current_col
|
|
|
|
|
# reward -= 10
|
|
|
|
|
elif move_dir == 'right':
|
|
|
|
|
if current_col < len(self.col_cuts) - 2:
|
|
|
|
|
new_col = current_col + 1
|
|
|
|
|
else:
|
|
|
|
|
new_col = current_col
|
|
|
|
|
# reward -= 10
|
2025-03-11 16:01:07 +08:00
|
|
|
|
# 如果移动不合法,或者动作为stay,则保持原位置
|
|
|
|
|
|
2025-03-21 16:04:42 +08:00
|
|
|
|
# 检查是否移动
|
|
|
|
|
car_moved = (new_row != current_row or new_col != current_col)
|
2025-03-11 16:01:07 +08:00
|
|
|
|
# 更新车辆位置
|
2025-03-13 21:28:30 +08:00
|
|
|
|
self.car_pos[current_car] = self.rectangles[(
|
|
|
|
|
new_row, new_col)]['center']
|
2025-03-21 16:04:42 +08:00
|
|
|
|
if car_moved:
|
2025-03-13 15:09:58 +08:00
|
|
|
|
self.car_traj[current_car].append((new_row, new_col))
|
2025-03-21 16:04:42 +08:00
|
|
|
|
# 更新访问标记:将新网格标记为已访问
|
|
|
|
|
self.rectangles[(new_row, new_col)]['is_visited'] = True
|
|
|
|
|
|
|
|
|
|
# 记录所有车辆一轮中是否移动
|
|
|
|
|
if self.current_car_index == 0:
|
|
|
|
|
# 新一轮的开始,初始化移动标记
|
|
|
|
|
self.cars_moved = [False] * self.num_cars
|
|
|
|
|
self.cars_moved[current_car] = car_moved
|
2025-03-22 09:47:52 +08:00
|
|
|
|
# 计算当前的 T 值
|
|
|
|
|
current_T = max([self._compute_motorcade_time(idx)
|
|
|
|
|
for idx in range(self.num_cars)])
|
2025-03-21 16:04:42 +08:00
|
|
|
|
# 如果一轮结束,检查是否所有车辆都没有移动
|
|
|
|
|
if self.current_car_index == (self.num_cars - 1) and not any(self.cars_moved):
|
2025-03-22 09:47:52 +08:00
|
|
|
|
# 增加时间 BASE_LINE / T * 10
|
|
|
|
|
self.delay_time += self.BASE_LINE * (1 / self.MAX_STEPS)
|
|
|
|
|
real_T = current_T + self.delay_time
|
2025-03-21 16:04:42 +08:00
|
|
|
|
|
2025-03-11 16:01:07 +08:00
|
|
|
|
self.step_count += 1
|
|
|
|
|
self.current_car_index = (
|
|
|
|
|
self.current_car_index + 1) % self.num_cars
|
|
|
|
|
|
|
|
|
|
# 观察状态
|
2025-03-19 14:22:24 +08:00
|
|
|
|
# 构造访问状态向量
|
|
|
|
|
max_regions = (self.CUT_NUM // 2 + 1) ** 2
|
|
|
|
|
visit_status = np.zeros(max_regions, dtype=np.float32)
|
2025-03-21 16:04:42 +08:00
|
|
|
|
|
2025-03-19 14:22:24 +08:00
|
|
|
|
# 将实际区域的访问状态填入向量
|
|
|
|
|
for i in range(len(self.row_cuts) - 1):
|
|
|
|
|
for j in range(len(self.col_cuts) - 1):
|
|
|
|
|
idx = i * (len(self.col_cuts) - 1) + j
|
2025-03-21 16:04:42 +08:00
|
|
|
|
visit_status[idx] = float(
|
|
|
|
|
self.rectangles[(i, j)]['is_visited'])
|
2025-03-19 14:22:24 +08:00
|
|
|
|
for i in range(idx + 1, max_regions):
|
|
|
|
|
visit_status[i] = 100
|
2025-03-22 09:47:52 +08:00
|
|
|
|
# 在状态向量最后增加一维,表示当前的 T 值
|
|
|
|
|
state = np.concatenate(
|
|
|
|
|
[self.partition_values, visit_status, [real_T]])
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
# Episode 终止条件:所有网格均被访问或步数达到上限
|
2025-03-11 19:43:04 +08:00
|
|
|
|
done = all([value['is_visited'] for _, value in self.rectangles.items()]) or (
|
2025-03-11 16:01:07 +08:00
|
|
|
|
self.step_count >= self.MAX_STEPS)
|
2025-03-11 19:43:04 +08:00
|
|
|
|
if done and all([value['is_visited'] for _, value in self.rectangles.items()]):
|
2025-03-22 09:47:52 +08:00
|
|
|
|
# # 区域覆盖完毕,根据轨迹计算各车队的执行时间
|
|
|
|
|
# T = max([self._compute_motorcade_time(idx)
|
|
|
|
|
# for idx in range(self.num_cars)])
|
|
|
|
|
# # TODO 让奖励在baseline附近变化更剧烈
|
|
|
|
|
# # reward = math.exp(-T / self.BASE_LINE) * 1000
|
|
|
|
|
reward += self.BASE_LINE / real_T * 5
|
|
|
|
|
print(real_T, "="*20)
|
2025-03-19 20:40:35 +08:00
|
|
|
|
|
|
|
|
|
# if reward > self.BASE_LINE:
|
|
|
|
|
# reward -= 200
|
|
|
|
|
# # TODO 计算len(self.car_traj)的值,需要修改轨迹记录法则
|
|
|
|
|
# reward -= 10 * self.step_count
|
|
|
|
|
# TODO 动态调整baseline
|
2025-03-11 16:01:07 +08:00
|
|
|
|
elif done and self.step_count >= self.MAX_STEPS:
|
2025-03-22 09:47:52 +08:00
|
|
|
|
reward += -5
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
return state, reward, done, False, {}
|
|
|
|
|
|
|
|
|
|
def _compute_motorcade_time(self, idx):
|
2025-03-11 19:43:04 +08:00
|
|
|
|
flight_time = sum(self.rectangles[tuple(point)]['flight_time']
|
2025-03-11 16:01:07 +08:00
|
|
|
|
for point in self.car_traj[idx])
|
2025-03-11 19:43:04 +08:00
|
|
|
|
bs_time = sum(self.rectangles[tuple(point)]['bs_time']
|
2025-03-11 16:01:07 +08:00
|
|
|
|
for point in self.car_traj[idx])
|
|
|
|
|
|
|
|
|
|
# 计算车的移动时间,首先在轨迹的首尾添加上大区域中心
|
2025-03-11 19:43:04 +08:00
|
|
|
|
car_time = 0
|
|
|
|
|
for i in range(len(self.car_traj[idx]) - 1):
|
2025-03-11 16:01:07 +08:00
|
|
|
|
first_point = self.car_traj[idx][i]
|
|
|
|
|
second_point = self.car_traj[idx][i + 1]
|
2025-03-13 15:09:58 +08:00
|
|
|
|
car_time += math.dist(self.rectangles[first_point]['center'], self.rectangles[second_point]['center']) * \
|
2025-03-12 11:33:35 +08:00
|
|
|
|
self.car_time_factor
|
2025-03-13 15:09:58 +08:00
|
|
|
|
car_time += math.dist(self.rectangles[self.car_traj[idx][0]]['center'], [
|
|
|
|
|
self.H / 2, self.W / 2]) * self.car_time_factor
|
|
|
|
|
car_time += math.dist(self.rectangles[self.car_traj[idx][-1]]['center'], [
|
|
|
|
|
self.H / 2, self.W / 2]) * self.car_time_factor
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
2025-03-11 19:43:04 +08:00
|
|
|
|
return max(float(car_time) + flight_time, bs_time)
|
2025-03-11 16:01:07 +08:00
|
|
|
|
|
|
|
|
|
def render(self):
|
2025-03-12 11:33:35 +08:00
|
|
|
|
if self.phase == 1:
|
|
|
|
|
print("Phase 1: Initialize maze environment.")
|
2025-03-11 16:01:07 +08:00
|
|
|
|
print(f"Partition values so far: {self.partition_values}")
|
2025-03-12 11:33:35 +08:00
|
|
|
|
print(f"Motorcade positon: {self.car_pos}")
|
2025-03-13 15:55:14 +08:00
|
|
|
|
# input('1111')
|
2025-03-12 11:33:35 +08:00
|
|
|
|
elif self.phase == 2:
|
|
|
|
|
print("Phase 2: Play maze.")
|
|
|
|
|
print(f'Motorcade trajectory: {self.car_traj}')
|
2025-03-13 15:55:14 +08:00
|
|
|
|
# input('2222')
|