修改蒙特卡洛采样法

2025-03-14 11:01:02 +08:00 · 2025-03-14 11:01:02 +08:00 · dfec68e122
commit dfec68e122
parent b3b5e597b8
2 changed files with 30 additions and 17 deletions
--- a/env.py
+++ b/env.py
@ -38,9 +38,9 @@ class PartitionMazeEnv(gym.Env):

        self.phase = 0    # 阶段控制，0：区域划分阶段，1：迷宫初始化阶段，2：走迷宫阶段
        self.partition_step = 0      # 区域划分阶段步数，范围 0~4
-        # TODO 切的刀数现在固定为4（2+2）
+        self.CUT_NUM = 6    # 横切一半，竖切一半
        self.partition_values = np.zeros(
-            4, dtype=np.float32)  # 存储 c₁, c₂, r₁, r₂
+            self.CUT_NUM, dtype=np.float32)  # 存储 c₁, c₂, r₁, r₂

        # 定义动作空间：全部动作均为 1 维连续 [0,1]
        self.action_space = spaces.Box(
@ -51,7 +51,7 @@ class PartitionMazeEnv(gym.Env):
        # 阶段 0 状态：前 4 维表示已决策的切分值（未决策部分为 0）
        # 阶段 1 状态：车辆位置 (2D)
        self.observation_space = spaces.Box(
-            low=0.0, high=1.0, shape=(4 + 2 * self.num_cars,), dtype=np.float32)
+            low=0.0, high=1.0, shape=(self.CUT_NUM + 2 * self.num_cars,), dtype=np.float32)

        # 切分阶段相关变量
        self.col_cuts = []     # 存储竖切位置（c₁, c₂），当值为0时表示不切
@ -72,7 +72,7 @@ class PartitionMazeEnv(gym.Env):
        # 重置所有变量，回到切分阶段（phase 0）
        self.phase = 0
        self.partition_step = 0
-        self.partition_values = np.zeros(4, dtype=np.float32)
+        self.partition_values = np.zeros(self.CUT_NUM, dtype=np.float32)
        self.col_cuts = []
        self.row_cuts = []
        self.init_maze_step = 0
@ -103,7 +103,7 @@ class PartitionMazeEnv(gym.Env):
                [self.partition_values, np.zeros(np.array(self.car_pos).flatten().shape[0], dtype=np.float32)])

            # 如果未完成 4 步，则仍处于切分阶段，不发奖励，done 为 False
-            if self.partition_step < 4:
+            if self.partition_step < self.CUT_NUM:
                return state, 0.0, False, False, {}
            else:
                # 完成 4 步后，计算切分边界
--- a/mtkl_sovler.py
+++ b/mtkl_sovler.py
@ -2,11 +2,12 @@ import random
 import math
 import yaml
 import json
+import numpy as np

 # 固定随机种子，便于复现
 random.seed(42)

-num_iterations = 1000000
+num_iterations = 10000

 # ---------------------------
 # 参数设置
@ -37,14 +38,21 @@ best_solution = None

 for iteration in range(num_iterations):
    # 随机生成分区的行分段数与列分段数
-    R = random.randint(1, 5)  # 行分段数
-    C = random.randint(1, 5)  # 列分段数
+    R = random.randint(0, 5)  # 行分段数
+    C = random.randint(0, 5)  # 列分段数

    # 生成随机的行、列分割边界
-    row_boundaries = sorted(random.sample(range(1, H), R - 1))
-    row_boundaries = [0] + row_boundaries + [H]
-    col_boundaries = sorted(random.sample(range(1, W), C - 1))
-    col_boundaries = [0] + col_boundaries + [W]
+    horiz = [np.clip(np.floor(random.random() * 10) /10, 0.0, 0.9) for _ in range(R)]
+    horiz = sorted(set(horiz))
+    horiz = horiz if horiz else []
+    row_boundaries = [0] + horiz + [1]
+    row_boundaries = [boundary * H for boundary in row_boundaries]
+
+    vert = [np.clip(np.floor(random.random() * 10) /10, 0.0, 0.9) for _ in range(C)]
+    vert = sorted(set(vert))
+    vert = vert if vert else []
+    col_boundaries = [0] + vert + [1]
+    col_boundaries = [boundary * W for boundary in col_boundaries]

    # ---------------------------
    # 根据分割边界生成所有矩形任务
@ -116,12 +124,14 @@ for iteration in range(num_iterations):
        total_flight_time = sum(task['flight_time'] for task in tasks)
        if tasks:
            # 车辆从区域中心到第一个任务中心
-            car_time = math.dist(tasks[0]['center'], region_center) * car_time_factor
+            car_time = math.dist(tasks[0]['center'],
+                                 region_center) * car_time_factor
            # 依次经过任务中心
            for j in range(len(tasks) - 1):
                prev_center = tasks[j]['center']
                curr_center = tasks[j + 1]['center']
-                car_time += math.dist(curr_center, prev_center) * car_time_factor
+                car_time += math.dist(curr_center,
+                                      prev_center) * car_time_factor
            # 回到区域中心
            car_time += math.dist(region_center, curr_center) * car_time_factor
        else:
@ -167,18 +177,21 @@ if best_solution is not None:
    car_paths = {}
    for i in range(k):
        num_tasks = len(best_solution['system_tasks'][i])
-        print(f"系统 {i}: 完成时间 T = {best_solution['T_k_list'][i]}, 飞行任务数量: {num_tasks}")
+        print(
+            f"系统 {i}: 完成时间 T = {best_solution['T_k_list'][i]}, 飞行任务数量: {num_tasks}")
        tasks = best_solution['system_tasks'][i]
        tasks.sort(key=lambda r: math.hypot(r['center'][0] - region_center[0],
                                            r['center'][1] - region_center[1]))
        if tasks:
-            print(f"轨迹路线: 区域中心({region_center[0]:.1f}, {region_center[1]:.1f})", end="")
+            print(
+                f"轨迹路线: 区域中心({region_center[0]:.1f}, {region_center[1]:.1f})", end="")
            current_pos = region_center
            car_path = []
            for j, task in enumerate(tasks, 1):
                current_pos = task['center']
                car_path.append(current_pos)
-                print(f" -> 任务{j}({current_pos[0]:.1f}, {current_pos[1]:.1f})", end="")
+                print(
+                    f" -> 任务{j}({current_pos[0]:.1f}, {current_pos[1]:.1f})", end="")
            print(" -> 区域中心")
            car_paths[i] = car_path