修改蒙特卡洛采样法
This commit is contained in:
parent
b3b5e597b8
commit
dfec68e122
10
env.py
10
env.py
@ -38,9 +38,9 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
|
|
||||||
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
||||||
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
||||||
# TODO 切的刀数现在固定为4(2+2)
|
self.CUT_NUM = 6 # 横切一半,竖切一半
|
||||||
self.partition_values = np.zeros(
|
self.partition_values = np.zeros(
|
||||||
4, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂
|
self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂
|
||||||
|
|
||||||
# 定义动作空间:全部动作均为 1 维连续 [0,1]
|
# 定义动作空间:全部动作均为 1 维连续 [0,1]
|
||||||
self.action_space = spaces.Box(
|
self.action_space = spaces.Box(
|
||||||
@ -51,7 +51,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
# 阶段 0 状态:前 4 维表示已决策的切分值(未决策部分为 0)
|
# 阶段 0 状态:前 4 维表示已决策的切分值(未决策部分为 0)
|
||||||
# 阶段 1 状态:车辆位置 (2D)
|
# 阶段 1 状态:车辆位置 (2D)
|
||||||
self.observation_space = spaces.Box(
|
self.observation_space = spaces.Box(
|
||||||
low=0.0, high=1.0, shape=(4 + 2 * self.num_cars,), dtype=np.float32)
|
low=0.0, high=1.0, shape=(self.CUT_NUM + 2 * self.num_cars,), dtype=np.float32)
|
||||||
|
|
||||||
# 切分阶段相关变量
|
# 切分阶段相关变量
|
||||||
self.col_cuts = [] # 存储竖切位置(c₁, c₂),当值为0时表示不切
|
self.col_cuts = [] # 存储竖切位置(c₁, c₂),当值为0时表示不切
|
||||||
@ -72,7 +72,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
# 重置所有变量,回到切分阶段(phase 0)
|
# 重置所有变量,回到切分阶段(phase 0)
|
||||||
self.phase = 0
|
self.phase = 0
|
||||||
self.partition_step = 0
|
self.partition_step = 0
|
||||||
self.partition_values = np.zeros(4, dtype=np.float32)
|
self.partition_values = np.zeros(self.CUT_NUM, dtype=np.float32)
|
||||||
self.col_cuts = []
|
self.col_cuts = []
|
||||||
self.row_cuts = []
|
self.row_cuts = []
|
||||||
self.init_maze_step = 0
|
self.init_maze_step = 0
|
||||||
@ -103,7 +103,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
[self.partition_values, np.zeros(np.array(self.car_pos).flatten().shape[0], dtype=np.float32)])
|
[self.partition_values, np.zeros(np.array(self.car_pos).flatten().shape[0], dtype=np.float32)])
|
||||||
|
|
||||||
# 如果未完成 4 步,则仍处于切分阶段,不发奖励,done 为 False
|
# 如果未完成 4 步,则仍处于切分阶段,不发奖励,done 为 False
|
||||||
if self.partition_step < 4:
|
if self.partition_step < self.CUT_NUM:
|
||||||
return state, 0.0, False, False, {}
|
return state, 0.0, False, False, {}
|
||||||
else:
|
else:
|
||||||
# 完成 4 步后,计算切分边界
|
# 完成 4 步后,计算切分边界
|
||||||
|
@ -2,11 +2,12 @@ import random
|
|||||||
import math
|
import math
|
||||||
import yaml
|
import yaml
|
||||||
import json
|
import json
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
# 固定随机种子,便于复现
|
# 固定随机种子,便于复现
|
||||||
random.seed(42)
|
random.seed(42)
|
||||||
|
|
||||||
num_iterations = 1000000
|
num_iterations = 10000
|
||||||
|
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
# 参数设置
|
# 参数设置
|
||||||
@ -37,14 +38,21 @@ best_solution = None
|
|||||||
|
|
||||||
for iteration in range(num_iterations):
|
for iteration in range(num_iterations):
|
||||||
# 随机生成分区的行分段数与列分段数
|
# 随机生成分区的行分段数与列分段数
|
||||||
R = random.randint(1, 5) # 行分段数
|
R = random.randint(0, 5) # 行分段数
|
||||||
C = random.randint(1, 5) # 列分段数
|
C = random.randint(0, 5) # 列分段数
|
||||||
|
|
||||||
# 生成随机的行、列分割边界
|
# 生成随机的行、列分割边界
|
||||||
row_boundaries = sorted(random.sample(range(1, H), R - 1))
|
horiz = [np.clip(np.floor(random.random() * 10) /10, 0.0, 0.9) for _ in range(R)]
|
||||||
row_boundaries = [0] + row_boundaries + [H]
|
horiz = sorted(set(horiz))
|
||||||
col_boundaries = sorted(random.sample(range(1, W), C - 1))
|
horiz = horiz if horiz else []
|
||||||
col_boundaries = [0] + col_boundaries + [W]
|
row_boundaries = [0] + horiz + [1]
|
||||||
|
row_boundaries = [boundary * H for boundary in row_boundaries]
|
||||||
|
|
||||||
|
vert = [np.clip(np.floor(random.random() * 10) /10, 0.0, 0.9) for _ in range(C)]
|
||||||
|
vert = sorted(set(vert))
|
||||||
|
vert = vert if vert else []
|
||||||
|
col_boundaries = [0] + vert + [1]
|
||||||
|
col_boundaries = [boundary * W for boundary in col_boundaries]
|
||||||
|
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
# 根据分割边界生成所有矩形任务
|
# 根据分割边界生成所有矩形任务
|
||||||
@ -116,12 +124,14 @@ for iteration in range(num_iterations):
|
|||||||
total_flight_time = sum(task['flight_time'] for task in tasks)
|
total_flight_time = sum(task['flight_time'] for task in tasks)
|
||||||
if tasks:
|
if tasks:
|
||||||
# 车辆从区域中心到第一个任务中心
|
# 车辆从区域中心到第一个任务中心
|
||||||
car_time = math.dist(tasks[0]['center'], region_center) * car_time_factor
|
car_time = math.dist(tasks[0]['center'],
|
||||||
|
region_center) * car_time_factor
|
||||||
# 依次经过任务中心
|
# 依次经过任务中心
|
||||||
for j in range(len(tasks) - 1):
|
for j in range(len(tasks) - 1):
|
||||||
prev_center = tasks[j]['center']
|
prev_center = tasks[j]['center']
|
||||||
curr_center = tasks[j + 1]['center']
|
curr_center = tasks[j + 1]['center']
|
||||||
car_time += math.dist(curr_center, prev_center) * car_time_factor
|
car_time += math.dist(curr_center,
|
||||||
|
prev_center) * car_time_factor
|
||||||
# 回到区域中心
|
# 回到区域中心
|
||||||
car_time += math.dist(region_center, curr_center) * car_time_factor
|
car_time += math.dist(region_center, curr_center) * car_time_factor
|
||||||
else:
|
else:
|
||||||
@ -167,18 +177,21 @@ if best_solution is not None:
|
|||||||
car_paths = {}
|
car_paths = {}
|
||||||
for i in range(k):
|
for i in range(k):
|
||||||
num_tasks = len(best_solution['system_tasks'][i])
|
num_tasks = len(best_solution['system_tasks'][i])
|
||||||
print(f"系统 {i}: 完成时间 T = {best_solution['T_k_list'][i]}, 飞行任务数量: {num_tasks}")
|
print(
|
||||||
|
f"系统 {i}: 完成时间 T = {best_solution['T_k_list'][i]}, 飞行任务数量: {num_tasks}")
|
||||||
tasks = best_solution['system_tasks'][i]
|
tasks = best_solution['system_tasks'][i]
|
||||||
tasks.sort(key=lambda r: math.hypot(r['center'][0] - region_center[0],
|
tasks.sort(key=lambda r: math.hypot(r['center'][0] - region_center[0],
|
||||||
r['center'][1] - region_center[1]))
|
r['center'][1] - region_center[1]))
|
||||||
if tasks:
|
if tasks:
|
||||||
print(f"轨迹路线: 区域中心({region_center[0]:.1f}, {region_center[1]:.1f})", end="")
|
print(
|
||||||
|
f"轨迹路线: 区域中心({region_center[0]:.1f}, {region_center[1]:.1f})", end="")
|
||||||
current_pos = region_center
|
current_pos = region_center
|
||||||
car_path = []
|
car_path = []
|
||||||
for j, task in enumerate(tasks, 1):
|
for j, task in enumerate(tasks, 1):
|
||||||
current_pos = task['center']
|
current_pos = task['center']
|
||||||
car_path.append(current_pos)
|
car_path.append(current_pos)
|
||||||
print(f" -> 任务{j}({current_pos[0]:.1f}, {current_pos[1]:.1f})", end="")
|
print(
|
||||||
|
f" -> 任务{j}({current_pos[0]:.1f}, {current_pos[1]:.1f})", end="")
|
||||||
print(" -> 区域中心")
|
print(" -> 区域中心")
|
||||||
car_paths[i] = car_path
|
car_paths[i] = car_path
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user