import numpy as np
import gym
from gym import spaces

class PartitionEnv(gym.Env):
    """区域划分环境（第一层）"""
    def __init__(self):
        super(PartitionEnv, self).__init__()
        
        # 环境参数
        self.H = 20  # 区域高度
        self.W = 25  # 区域宽度
        self.k = 1   # 系统数量
        
        # 动作空间：[垂直切割数, 水平切割数]
        self.action_space = spaces.Box(
            low=np.array([1, 1]),
            high=np.array([5, 5]),
            dtype=np.float32
        )
        
        # 状态空间：[当前垂直切割数, 当前水平切割数, 当前最大完成时间]
        self.observation_space = spaces.Box(
            low=np.array([1, 1, 0]),
            high=np.array([5, 5, float('inf')]),
            dtype=np.float32
        )
        
        self.state = None
        self.current_step = 0
        self.max_steps = 1000
    
    def generate_subareas(self, v_cuts, h_cuts):
        """生成子区域信息"""
        v_boundaries = np.linspace(0, self.H, v_cuts + 1)
        h_boundaries = np.linspace(0, self.W, h_cuts + 1)
        
        subareas = []
        for i in range(len(v_boundaries) - 1):
            for j in range(len(h_boundaries) - 1):
                height = v_boundaries[i+1] - v_boundaries[i]
                width = h_boundaries[j+1] - h_boundaries[j]
                center_y = (v_boundaries[i] + v_boundaries[i+1]) / 2
                center_x = (h_boundaries[j] + h_boundaries[j+1]) / 2
                
                subareas.append({
                    'height': height,
                    'width': width,
                    'area': height * width,
                    'center': (center_y, center_x)
                })
        return subareas
    
    def step(self, action):
        self.current_step += 1
        
        # 解析动作
        v_cuts = int(action[0])  # 垂直切割数
        h_cuts = int(action[1])  # 水平切割数
        
        # 生成子区域
        subareas = self.generate_subareas(v_cuts, h_cuts)
        
        # 调用第二层（任务分配）获取结果
        from env_allocation import AllocationEnv
        alloc_env = AllocationEnv(subareas, self.k)
        total_time, valid = alloc_env.optimize()
        
        # 计算奖励
        if not valid:
            reward = -10000  # 惩罚无效方案
            done = True
        else:
            reward = -total_time  # 负的完成时间作为奖励
            done = self.current_step >= self.max_steps
        
        # 更新状态
        self.state = np.array([v_cuts, h_cuts, total_time])
        
        return self.state, reward, done, {}
    
    def reset(self):
        self.state = np.array([1, 1, 0])
        self.current_step = 0
        return self.state
    
    def render(self, mode='human'):
        pass