diff --git a/env.py b/env.py index 4dd4d68..4f50b7b 100644 --- a/env.py +++ b/env.py @@ -38,9 +38,9 @@ class PartitionMazeEnv(gym.Env): self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段 self.partition_step = 0 # 区域划分阶段步数,范围 0~4 - # TODO 切的刀数现在固定为4(2+2) + self.CUT_NUM = 6 # 横切一半,竖切一半 self.partition_values = np.zeros( - 4, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂ + self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂ # 定义动作空间:全部动作均为 1 维连续 [0,1] self.action_space = spaces.Box( @@ -51,7 +51,7 @@ class PartitionMazeEnv(gym.Env): # 阶段 0 状态:前 4 维表示已决策的切分值(未决策部分为 0) # 阶段 1 状态:车辆位置 (2D) self.observation_space = spaces.Box( - low=0.0, high=1.0, shape=(4 + 2 * self.num_cars,), dtype=np.float32) + low=0.0, high=1.0, shape=(self.CUT_NUM + 2 * self.num_cars,), dtype=np.float32) # 切分阶段相关变量 self.col_cuts = [] # 存储竖切位置(c₁, c₂),当值为0时表示不切 @@ -72,7 +72,7 @@ class PartitionMazeEnv(gym.Env): # 重置所有变量,回到切分阶段(phase 0) self.phase = 0 self.partition_step = 0 - self.partition_values = np.zeros(4, dtype=np.float32) + self.partition_values = np.zeros(self.CUT_NUM, dtype=np.float32) self.col_cuts = [] self.row_cuts = [] self.init_maze_step = 0 @@ -103,7 +103,7 @@ class PartitionMazeEnv(gym.Env): [self.partition_values, np.zeros(np.array(self.car_pos).flatten().shape[0], dtype=np.float32)]) # 如果未完成 4 步,则仍处于切分阶段,不发奖励,done 为 False - if self.partition_step < 4: + if self.partition_step < self.CUT_NUM: return state, 0.0, False, False, {} else: # 完成 4 步后,计算切分边界 diff --git a/mtkl_sovler.py b/mtkl_sovler.py index 102b595..cf67735 100644 --- a/mtkl_sovler.py +++ b/mtkl_sovler.py @@ -2,11 +2,12 @@ import random import math import yaml import json +import numpy as np # 固定随机种子,便于复现 random.seed(42) -num_iterations = 1000000 +num_iterations = 10000 # --------------------------- # 参数设置 @@ -37,14 +38,21 @@ best_solution = None for iteration in range(num_iterations): # 随机生成分区的行分段数与列分段数 - R = random.randint(1, 5) # 行分段数 - C = random.randint(1, 5) # 列分段数 + R = random.randint(0, 5) # 行分段数 + C = random.randint(0, 5) # 列分段数 # 生成随机的行、列分割边界 - row_boundaries = sorted(random.sample(range(1, H), R - 1)) - row_boundaries = [0] + row_boundaries + [H] - col_boundaries = sorted(random.sample(range(1, W), C - 1)) - col_boundaries = [0] + col_boundaries + [W] + horiz = [np.clip(np.floor(random.random() * 10) /10, 0.0, 0.9) for _ in range(R)] + horiz = sorted(set(horiz)) + horiz = horiz if horiz else [] + row_boundaries = [0] + horiz + [1] + row_boundaries = [boundary * H for boundary in row_boundaries] + + vert = [np.clip(np.floor(random.random() * 10) /10, 0.0, 0.9) for _ in range(C)] + vert = sorted(set(vert)) + vert = vert if vert else [] + col_boundaries = [0] + vert + [1] + col_boundaries = [boundary * W for boundary in col_boundaries] # --------------------------- # 根据分割边界生成所有矩形任务 @@ -116,12 +124,14 @@ for iteration in range(num_iterations): total_flight_time = sum(task['flight_time'] for task in tasks) if tasks: # 车辆从区域中心到第一个任务中心 - car_time = math.dist(tasks[0]['center'], region_center) * car_time_factor + car_time = math.dist(tasks[0]['center'], + region_center) * car_time_factor # 依次经过任务中心 for j in range(len(tasks) - 1): prev_center = tasks[j]['center'] curr_center = tasks[j + 1]['center'] - car_time += math.dist(curr_center, prev_center) * car_time_factor + car_time += math.dist(curr_center, + prev_center) * car_time_factor # 回到区域中心 car_time += math.dist(region_center, curr_center) * car_time_factor else: @@ -167,18 +177,21 @@ if best_solution is not None: car_paths = {} for i in range(k): num_tasks = len(best_solution['system_tasks'][i]) - print(f"系统 {i}: 完成时间 T = {best_solution['T_k_list'][i]}, 飞行任务数量: {num_tasks}") + print( + f"系统 {i}: 完成时间 T = {best_solution['T_k_list'][i]}, 飞行任务数量: {num_tasks}") tasks = best_solution['system_tasks'][i] tasks.sort(key=lambda r: math.hypot(r['center'][0] - region_center[0], r['center'][1] - region_center[1])) if tasks: - print(f"轨迹路线: 区域中心({region_center[0]:.1f}, {region_center[1]:.1f})", end="") + print( + f"轨迹路线: 区域中心({region_center[0]:.1f}, {region_center[1]:.1f})", end="") current_pos = region_center car_path = [] for j, task in enumerate(tasks, 1): current_pos = task['center'] car_path.append(current_pos) - print(f" -> 任务{j}({current_pos[0]:.1f}, {current_pos[1]:.1f})", end="") + print( + f" -> 任务{j}({current_pos[0]:.1f}, {current_pos[1]:.1f})", end="") print(" -> 区域中心") car_paths[i] = car_path