import gymnasium as gym
from gymnasium import spaces
import numpy as np
import yaml
import math
from mTSP_solver import mTSP
from GA.ga import GA


class PartitionEnv(gym.Env):
    """
    自定义环境，分为两阶段：
    区域切分，每一次切分都是(0, 1)之间的连续值
    """

    def __init__(self, config=None):
        super(PartitionEnv, self).__init__()
        ##############################
        # 可能需要手动修改的超参数
        ##############################
        self.params = 'params3'
        self.CUT_NUM = 2
        self.ROW_CUT_LIMIT = 1
        self.COL_CUT_LIMIT = 1
        self.BASE_LINE = 5000
        self.mTSP_STEPS = 10000

        # 车队参数设置
        with open(self.params + '.yml', 'r', encoding='utf-8') as file:
            params = yaml.safe_load(file)

        self.H = params['H']
        self.W = params['W']
        self.center = (self.H/2, self.W/2)
        self.num_cars = params['num_cars']

        self.flight_time_factor = params['flight_time_factor']
        self.comp_time_factor = params['comp_time_factor']
        self.trans_time_factor = params['trans_time_factor']
        self.car_time_factor = params['car_time_factor']
        self.bs_time_factor = params['bs_time_factor']

        self.flight_energy_factor = params['flight_energy_factor']
        self.comp_energy_factor = params['comp_energy_factor']
        self.trans_energy_factor = params['trans_energy_factor']
        self.battery_energy_capacity = params['battery_energy_capacity']

        self.partition_step = 0      # 区域划分阶段步数，范围 0~4
        self.partition_values = np.zeros(
            self.CUT_NUM, dtype=np.float32)  # 存储 c₁, c₂, r₁, r₂

        # 定义动作空间：全部动作均为 1 维连续 [0,1]
        self.action_space = spaces.Box(
            low=0.0, high=1.0, shape=(1,), dtype=np.float32)

        # 定义观察空间为8维向量
        # 前 4 维表示已决策的切分值（未决策部分为 0）
        self.observation_space = spaces.Box(
            low=0.0, high=1.0, shape=(self.CUT_NUM,), dtype=np.float32)

        # 切分阶段相关变量
        self.col_cuts = []     # 存储竖切位置（c₁, c₂），当值为0时表示不切
        self.row_cuts = []   # 存储横切位置（r₁, r₂）
        self.rectangles = []

    def reset(self, seed=None, options=None):
        # 重置所有变量，回到切分阶段（phase 0）
        self.phase = 0
        self.partition_step = 0
        self.partition_values = np.zeros(self.CUT_NUM, dtype=np.float32)
        self.col_cuts = []
        self.row_cuts = []
        self.rectangles = []

        # 状态：前 4 维为 partition_values，其余为区域访问状态（初始全0）
        state = self.partition_values

        return state

    def step(self, action):
        # 在所有阶段动作均为 1 维连续动作，取 action[0]
        a = float(action[0])
        self.partition_values[self.partition_step] = a
        self.partition_step += 1

        # 构造当前状态：前 partition_step 个为已决策值，其余为 0，再补 7 个 0
        state = self.partition_values

        # 如果未完成 4 步，则仍处于切分阶段，不发奖励，done 为 False
        if self.partition_step < self.CUT_NUM:
            return state, 0.0, False, False, {}
        else:
            # 完成 4 步后，计算切分边界
            # 过滤掉 0，并去重后排序
            rows = sorted(
                set(v for v in self.partition_values[:self.ROW_CUT_LIMIT] if v > 0))
            cols = sorted(
                set(v for v in self.partition_values[self.ROW_CUT_LIMIT:] if v > 0))
            rows = rows if rows else []
            cols = cols if cols else []

            # 边界：始终包含 0 和 1
            self.row_cuts = [0.0] + rows + [1.0]
            self.col_cuts = [0.0] + cols + [1.0]

            # 判断分区是否合理，并计算各个分区的任务卸载率ρ
            valid_partition = True
            for i in range(len(self.row_cuts) - 1):
                for j in range(len(self.col_cuts) - 1):
                    d = (self.col_cuts[j+1] - self.col_cuts[j]) * self.W * \
                        (self.row_cuts[i+1] - self.row_cuts[i]) * self.H
                    rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
                        (self.comp_time_factor - self.trans_time_factor)
                    rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
                        (self.comp_energy_factor * d -
                            self.trans_energy_factor * d)
                    if rho_energy_limit < 0:
                        valid_partition = False
                        break
                    rho = min(rho_time_limit, rho_energy_limit)

                    flight_time = self.flight_time_factor * d
                    bs_time = self.bs_time_factor * (1 - rho) * d

                    self.rectangles.append({
                        'center': ((self.row_cuts[i] + self.row_cuts[i+1]) * self.H / 2, (self.col_cuts[j+1] + self.col_cuts[j]) * self.W / 2),
                        'flight_time': flight_time,
                        'bs_time': bs_time,
                    })
                if not valid_partition:
                    break

            if not valid_partition:
                reward = -100
                state = self.partition_values
                return state, reward, True, False, {}
            else:
                reward = 0
                state = self.partition_values

                # 继续进行路径规划
                # 使用q_learning解多旅行商
                # cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标
                # rec_center_lt = [rec_info['center']
                #                  for rec_info in self.rectangles]
                # cities = np.column_stack(rec_center_lt)
                # cities = np.column_stack((self.center, cities))

                # center_idx = []
                # for i in range(self.num_cars - 1):
                #     cities = np.column_stack((cities, self.center))
                #     center_idx.append(cities.shape[1] - 1)

                # tsp = mTSP(params=self.params, num_cities=cities.shape[1], cities=cities, num_cars=self.num_cars,
                #            center_idx=center_idx, rectangles=self.rectangles)

                # best_time, best_path = tsp.train(self.mTSP_STEPS)

                # 使用遗传算法解多旅行商
                cities = [self.center]
                for rec in self.rectangles:
                    cities.append(rec['center'])
                cities = np.array(cities)

                center_idx = [0]
                for i in range(self.num_cars - 1):
                    cities = np.row_stack((cities, self.center))
                    center_idx.append(cities.shape[0] - 1)

                ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20,
                        data=cities, to_process_idx=center_idx, rectangles=self.rectangles)

                best_path, best_time = ga.run()

                # print(best_time)
                # print(best_path)

                reward += self.BASE_LINE - best_time

                return state, reward, True, False, best_path

    def render(self):
        if self.phase == 1:
            print("Phase 1: Initialize maze environment.")
            print(f"Partition values so far: {self.partition_values}")
            print(f"Motorcade positon: {self.car_pos}")
            # input('1111')
        elif self.phase == 2:
            print("Phase 2: Play maze.")
            print(f'Motorcade trajectory: {self.car_traj}')
            # input('2222')