89 lines
2.8 KiB
Python
89 lines
2.8 KiB
Python
import numpy as np
|
|
import gym
|
|
from gym import spaces
|
|
|
|
class PartitionEnv(gym.Env):
|
|
"""区域划分环境(第一层)"""
|
|
def __init__(self):
|
|
super(PartitionEnv, self).__init__()
|
|
|
|
# 环境参数
|
|
self.H = 20 # 区域高度
|
|
self.W = 25 # 区域宽度
|
|
self.k = 1 # 系统数量
|
|
|
|
# 动作空间:[垂直切割数, 水平切割数]
|
|
self.action_space = spaces.Box(
|
|
low=np.array([1, 1]),
|
|
high=np.array([5, 5]),
|
|
dtype=np.float32
|
|
)
|
|
|
|
# 状态空间:[当前垂直切割数, 当前水平切割数, 当前最大完成时间]
|
|
self.observation_space = spaces.Box(
|
|
low=np.array([1, 1, 0]),
|
|
high=np.array([5, 5, float('inf')]),
|
|
dtype=np.float32
|
|
)
|
|
|
|
self.state = None
|
|
self.current_step = 0
|
|
self.max_steps = 1000
|
|
|
|
def generate_subareas(self, v_cuts, h_cuts):
|
|
"""生成子区域信息"""
|
|
v_boundaries = np.linspace(0, self.H, v_cuts + 1)
|
|
h_boundaries = np.linspace(0, self.W, h_cuts + 1)
|
|
|
|
subareas = []
|
|
for i in range(len(v_boundaries) - 1):
|
|
for j in range(len(h_boundaries) - 1):
|
|
height = v_boundaries[i+1] - v_boundaries[i]
|
|
width = h_boundaries[j+1] - h_boundaries[j]
|
|
center_y = (v_boundaries[i] + v_boundaries[i+1]) / 2
|
|
center_x = (h_boundaries[j] + h_boundaries[j+1]) / 2
|
|
|
|
subareas.append({
|
|
'height': height,
|
|
'width': width,
|
|
'area': height * width,
|
|
'center': (center_y, center_x)
|
|
})
|
|
return subareas
|
|
|
|
def step(self, action):
|
|
self.current_step += 1
|
|
|
|
# 解析动作
|
|
v_cuts = int(action[0]) # 垂直切割数
|
|
h_cuts = int(action[1]) # 水平切割数
|
|
|
|
# 生成子区域
|
|
subareas = self.generate_subareas(v_cuts, h_cuts)
|
|
|
|
# 调用第二层(任务分配)获取结果
|
|
from env_allocation import AllocationEnv
|
|
alloc_env = AllocationEnv(subareas, self.k)
|
|
total_time, valid = alloc_env.optimize()
|
|
|
|
# 计算奖励
|
|
if not valid:
|
|
reward = -10000 # 惩罚无效方案
|
|
done = True
|
|
else:
|
|
reward = -total_time # 负的完成时间作为奖励
|
|
done = self.current_step >= self.max_steps
|
|
|
|
# 更新状态
|
|
self.state = np.array([v_cuts, h_cuts, total_time])
|
|
|
|
return self.state, reward, done, {}
|
|
|
|
def reset(self):
|
|
self.state = np.array([1, 1, 0])
|
|
self.current_step = 0
|
|
return self.state
|
|
|
|
def render(self, mode='human'):
|
|
pass
|