HPCC2025/env_partion.py
2025-03-29 11:43:04 +08:00

192 lines
7.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import gymnasium as gym
from gymnasium import spaces
import numpy as np
import yaml
import math
from mTSP_solver import mTSP
from GA.ga import GA
class PartitionEnv(gym.Env):
"""
自定义环境,分为两阶段:
区域切分,每一次切分都是(0, 1)之间的连续值
"""
def __init__(self, config=None):
super(PartitionEnv, self).__init__()
##############################
# 可能需要手动修改的超参数
##############################
self.params = 'params3'
self.CUT_NUM = 2
self.ROW_CUT_LIMIT = 1
self.COL_CUT_LIMIT = 1
self.BASE_LINE = 5000
self.mTSP_STEPS = 10000
# 车队参数设置
with open(self.params + '.yml', 'r', encoding='utf-8') as file:
params = yaml.safe_load(file)
self.H = params['H']
self.W = params['W']
self.center = (self.H/2, self.W/2)
self.num_cars = params['num_cars']
self.flight_time_factor = params['flight_time_factor']
self.comp_time_factor = params['comp_time_factor']
self.trans_time_factor = params['trans_time_factor']
self.car_time_factor = params['car_time_factor']
self.bs_time_factor = params['bs_time_factor']
self.flight_energy_factor = params['flight_energy_factor']
self.comp_energy_factor = params['comp_energy_factor']
self.trans_energy_factor = params['trans_energy_factor']
self.battery_energy_capacity = params['battery_energy_capacity']
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
self.partition_values = np.zeros(
self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂
# 定义动作空间:全部动作均为 1 维连续 [0,1]
self.action_space = spaces.Box(
low=0.0, high=1.0, shape=(1,), dtype=np.float32)
# 定义观察空间为8维向量
# 前 4 维表示已决策的切分值(未决策部分为 0
self.observation_space = spaces.Box(
low=0.0, high=1.0, shape=(self.CUT_NUM,), dtype=np.float32)
# 切分阶段相关变量
self.col_cuts = [] # 存储竖切位置c₁, c₂当值为0时表示不切
self.row_cuts = [] # 存储横切位置r₁, r₂
self.rectangles = []
def reset(self, seed=None, options=None):
# 重置所有变量回到切分阶段phase 0
self.phase = 0
self.partition_step = 0
self.partition_values = np.zeros(self.CUT_NUM, dtype=np.float32)
self.col_cuts = []
self.row_cuts = []
self.rectangles = []
# 状态:前 4 维为 partition_values其余为区域访问状态初始全0
state = self.partition_values
return state
def step(self, action):
# 在所有阶段动作均为 1 维连续动作,取 action[0]
a = float(action[0])
self.partition_values[self.partition_step] = a
self.partition_step += 1
# 构造当前状态:前 partition_step 个为已决策值,其余为 0再补 7 个 0
state = self.partition_values
# 如果未完成 4 步则仍处于切分阶段不发奖励done 为 False
if self.partition_step < self.CUT_NUM:
return state, 0.0, False, False, {}
else:
# 完成 4 步后,计算切分边界
# 过滤掉 0并去重后排序
rows = sorted(
set(v for v in self.partition_values[:self.ROW_CUT_LIMIT] if v > 0))
cols = sorted(
set(v for v in self.partition_values[self.ROW_CUT_LIMIT:] if v > 0))
rows = rows if rows else []
cols = cols if cols else []
# 边界:始终包含 0 和 1
self.row_cuts = [0.0] + rows + [1.0]
self.col_cuts = [0.0] + cols + [1.0]
# 判断分区是否合理,并计算各个分区的任务卸载率ρ
valid_partition = True
for i in range(len(self.row_cuts) - 1):
for j in range(len(self.col_cuts) - 1):
d = (self.col_cuts[j+1] - self.col_cuts[j]) * self.W * \
(self.row_cuts[i+1] - self.row_cuts[i]) * self.H
rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
(self.comp_time_factor - self.trans_time_factor)
rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
(self.comp_energy_factor * d -
self.trans_energy_factor * d)
if rho_energy_limit < 0:
valid_partition = False
break
rho = min(rho_time_limit, rho_energy_limit)
flight_time = self.flight_time_factor * d
bs_time = self.bs_time_factor * (1 - rho) * d
self.rectangles.append({
'center': ((self.row_cuts[i] + self.row_cuts[i+1]) * self.H / 2, (self.col_cuts[j+1] + self.col_cuts[j]) * self.W / 2),
'flight_time': flight_time,
'bs_time': bs_time,
})
if not valid_partition:
break
if not valid_partition:
reward = -100
state = self.partition_values
return state, reward, True, False, {}
else:
reward = 0
state = self.partition_values
# 继续进行路径规划
# 使用q_learning解多旅行商
# cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标
# rec_center_lt = [rec_info['center']
# for rec_info in self.rectangles]
# cities = np.column_stack(rec_center_lt)
# cities = np.column_stack((self.center, cities))
# center_idx = []
# for i in range(self.num_cars - 1):
# cities = np.column_stack((cities, self.center))
# center_idx.append(cities.shape[1] - 1)
# tsp = mTSP(params=self.params, num_cities=cities.shape[1], cities=cities, num_cars=self.num_cars,
# center_idx=center_idx, rectangles=self.rectangles)
# best_time, best_path = tsp.train(self.mTSP_STEPS)
# 使用遗传算法解多旅行商
cities = [self.center]
for rec in self.rectangles:
cities.append(rec['center'])
cities = np.array(cities)
center_idx = [0]
for i in range(self.num_cars - 1):
cities = np.row_stack((cities, self.center))
center_idx.append(cities.shape[0] - 1)
ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20,
data=cities, to_process_idx=center_idx, rectangles=self.rectangles)
best_path, best_time = ga.run()
# print(best_time)
# print(best_path)
reward += self.BASE_LINE - best_time
return state, reward, True, False, best_path
def render(self):
if self.phase == 1:
print("Phase 1: Initialize maze environment.")
print(f"Partition values so far: {self.partition_values}")
print(f"Motorcade positon: {self.car_pos}")
# input('1111')
elif self.phase == 2:
print("Phase 2: Play maze.")
print(f'Motorcade trajectory: {self.car_traj}')
# input('2222')