改成50_50_3场景

2025-04-12 22:55:01 +08:00 · 2025-04-12 22:55:01 +08:00 · 6a82010112
commit 6a82010112
parent d64ec83042
11 changed files with 423 additions and 138 deletions
--- a/Duel_Double_DQN/main.py
+++ b/Duel_Double_DQN/main.py
@ -10,7 +10,7 @@ import torch
 import sys
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from env_partion_dist import PartitionEnv
+from env_partion_dist1 import PartitionEnv
 # fmt: on

 '''Hyperparameter Setting'''
--- a/Duel_Double_DQN/utils.py
+++ b/Duel_Double_DQN/utils.py
@ -47,7 +47,7 @@ def save_best_solution(info_lt):

    # 读取已有的最优解
    try:
-        with open('solutions/dqn_params_100_100_6.json', 'r') as f:
+        with open('solutions/dqn_params_50_50_3.json', 'r') as f:
            saved_solution = json.load(f)
            saved_time = saved_solution['best_time']
    except FileNotFoundError:
--- a/GA/ga.py
+++ b/GA/ga.py
@ -16,7 +16,7 @@ class GA(object):
        self.location = data
        self.to_process_idx = to_process_idx
        self.rectangles = rectangles
-        self.epochs = 1000
+        self.epochs = 500
        self.ga_choose_ratio = 0.2
        self.mutate_ratio = 0.05
        # fruits中存每一个个体是下标的list
@ -314,7 +314,7 @@ class GA(object):
                early_stop_cnt = 0
            else:
                early_stop_cnt += 1
-            if early_stop_cnt == 100:  # 若连续50次没有性能提升，则早停
+            if early_stop_cnt == 150:  # 若连续50次没有性能提升，则早停
                break
            self.best_record.append(1.0 / best_score)
            best_length = 1.0 / best_score
--- a/GA/use_ga.py
+++ b/GA/use_ga.py
@ -0,0 +1,93 @@
+import random
+import math
+import yaml
+import numpy as np
+from utils import if_valid_partition, GA_solver
+from itertools import product, combinations
+import json
+from tqdm import tqdm
+
+np.random.seed(42)
+random.seed(42)
+best_T = float('inf')
+best_solution = None
+best_row_boundaries = None
+best_col_boundaries = None
+
+
+# ---------------------------
+# 需要修改的超参数
+# ---------------------------
+params_file = 'params_50_50_3'
+
+
+with open(params_file + '.yml', 'r', encoding='utf-8') as file:
+    params = yaml.safe_load(file)
+
+H = params['H']
+W = params['W']
+k = params['num_cars']
+
+flight_time_factor = params['flight_time_factor']
+comp_time_factor = params['comp_time_factor']
+trans_time_factor = params['trans_time_factor']
+car_time_factor = params['car_time_factor']
+bs_time_factor = params['bs_time_factor']
+
+flight_energy_factor = params['flight_energy_factor']
+comp_energy_factor = params['comp_energy_factor']
+trans_energy_factor = params['trans_energy_factor']
+battery_energy_capacity = params['battery_energy_capacity']
+
+# # 定义数字列表
+# numbers = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+
+row_cuts_set = [[0.3, 0.48, 0.77]]
+col_cuts_set = [[0.5]]
+
+for row_cuts in row_cuts_set:
+    for col_cuts in col_cuts_set:
+        row_boundaries = [0.0] + list(row_cuts) + [1.0]
+        col_boundaries = [0.0] + list(col_cuts) + [1.0]
+
+        # 这里面的距离不再是比例，而是真实距离！
+        rectrangles = if_valid_partition(
+            row_boundaries, col_boundaries, params)
+        if not rectrangles:
+            continue
+        else:
+            # 使用遗传算法求出每一种网格划分的可行解，然后选择其中的最优解
+            current_solution, current_time, to_process_idx = GA_solver(
+                rectrangles, params)
+
+            if current_time < best_T:
+                best_T = current_time
+                best_solution = current_solution
+                best_row_boundaries = row_boundaries
+                best_col_boundaries = col_boundaries
+
+                # 将best_solution分解成每个车队的路径
+                found_start_points_indices = []
+                for i in range(len(best_solution)):
+                    if best_solution[i] in to_process_idx:
+                        found_start_points_indices.append(i)
+                car_paths = []
+                for j in range(len(found_start_points_indices) - 1):
+                    from_index = found_start_points_indices[j]
+                    end_index = found_start_points_indices[j + 1]
+                    car_path = []
+                    for k in range(from_index, end_index + 1):
+                        rectrangle_idx = best_solution[k]
+                        if rectrangle_idx not in to_process_idx:
+                            car_path.append(rectrangle_idx - 1)
+                    if car_path:
+                        car_paths.append(car_path)
+
+# 输出最佳方案
+print("Best solution:", best_solution)
+print("Time:", best_T)
+print("Row boundaries:", best_row_boundaries)
+print("Col boundaries:", best_col_boundaries)
+print("Car Paths:", car_paths)
+
+
--- a/GA/utils.py
+++ b/GA/utils.py
@ -1,5 +1,6 @@
 import numpy as np
 from ga import GA
+import matplotlib.pyplot as plt


 def if_valid_partition(row_boundaries, col_boundaries, params):
@ -91,4 +92,10 @@ def GA_solver(rectangles, params):
    if Best_path[-1] not in to_process_idx:
        Best_path.append(0)
    
+    # iterations = model.iter_x
+    # best_record = model.iter_y
+    # plt.plot(iterations, best_record)
+    # plt.show()
+    
+    
    return Best_path, Best, to_process_idx
--- a/env_partion_dist.py
+++ b/env_partion_dist.py
@ -73,37 +73,37 @@ class PartitionEnv(gym.Env):
        return state

    def step(self, action):
-        # if action == 1:
-        #     self.row_cuts[1] += 0.01
-        # elif action == 2:
-        #     self.row_cuts[1] -= 0.01
-        # elif action == 3:
-        #     self.row_cuts[2] += 0.01
-        # elif action == 4:
-        #     self.row_cuts[2] -= 0.01
-        # elif action == 5:
-        #     self.row_cuts[3] += 0.01
-        # elif action == 6:
-        #     self.row_cuts[3] -= 0.01
-        # elif action == 7:
-        #     self.col_cuts[1] += 0.01
-        # elif action == 8:
-        #     self.col_cuts[1] -= 0.01
-        # elif action == 0:
-        #     pass
-        cut_index, signal = (action + 1) // 2, (action + 1) % 2
-        if action == 0:
+        if action == 1:
+            self.row_cuts[1] += 0.01
+        elif action == 2:
+            self.row_cuts[1] -= 0.01
+        elif action == 3:
+            self.row_cuts[2] += 0.01
+        elif action == 4:
+            self.row_cuts[2] -= 0.01
+        elif action == 5:
+            self.row_cuts[3] += 0.01
+        elif action == 6:
+            self.row_cuts[3] -= 0.01
+        elif action == 7:
+            self.col_cuts[1] += 0.01
+        elif action == 8:
+            self.col_cuts[1] -= 0.01
+        elif action == 0:
            pass
-        elif cut_index <= 5:
-            if signal == 0:
-                self.col_cuts[cut_index] += 0.005
-            else:
-                self.col_cuts[cut_index] -= 0.005
-        else:
-            if signal == 0:
-                self.col_cuts[cut_index-4] += 0.005
-            else:
-                self.col_cuts[cut_index-4] -= 0.005
+        # cut_index, signal = (action + 1) // 2, (action + 1) % 2
+        # if action == 0:
+        #     pass
+        # elif cut_index <= 5:
+        #     if signal == 0:
+        #         self.col_cuts[cut_index] += 0.005
+        #     else:
+        #         self.col_cuts[cut_index] -= 0.005
+        # else:
+        #     if signal == 0:
+        #         self.col_cuts[cut_index-4] += 0.005
+        #     else:
+        #         self.col_cuts[cut_index-4] -= 0.005

        # 检查row_cuts和col_cuts是否按升序排列
        if (all(self.row_cuts[i] < self.row_cuts[i+1] for i in range(len(self.row_cuts)-1)) and
@ -115,16 +115,16 @@ class PartitionEnv(gym.Env):
                # 不满足条件，时间给一个很大的值
                best_time = self.BASE_LINE * 2
            else:
-                # # 满足条件，继续进行路径规划
-                # # 每隔10步计算一次路径，第一次也需要计算路径，记录最佳路径
-                # if self.adjust_step % 10 == 0 or self.adjust_step == 1 or self.best_path is None:
-                #     best_time, self.best_path = self.ga_solver(rectangles)
-                # else:
-                #     # 根据最佳路径计算当前时间
-                #     best_time = self.get_best_time(self.best_path, rectangles)
-                self.best_path = [33, 30, 29, 28, 27, 21, 15, 0, 13, 7, 1, 2, 31, 14, 8, 3, 4,
-                                  10, 32, 23, 22, 24, 18, 17, 16, 35, 9, 12, 6, 5, 11, 34, 20, 25, 26, 19, 0]
+                # 满足条件，继续进行路径规划
+                # 每隔10步计算一次路径，第一次也需要计算路径，记录最佳路径
+                if self.adjust_step % 10 == 0 or self.best_path is None:
+                    best_time, self.best_path = self.ga_solver(rectangles)
+                else:
+                    # 根据最佳路径计算当前时间
                    best_time = self.get_best_time(self.best_path, rectangles)
+                # self.best_path = [33, 30, 29, 28, 27, 21, 15, 0, 13, 7, 1, 2, 31, 14, 8, 3, 4,
+                #                   10, 32, 23, 22, 24, 18, 17, 16, 35, 9, 12, 6, 5, 11, 34, 20, 25, 26, 19, 0]
+                # best_time = self.get_best_time(self.best_path, rectangles)

        else:
            # 调整不合法，时间给一个很大的值
--- a/env_partion_dist1.py
+++ b/env_partion_dist1.py
@ -0,0 +1,275 @@
+import gymnasium as gym
+from gymnasium import spaces
+import numpy as np
+import yaml
+import math
+from mTSP_solver import mTSP
+from GA.ga import GA
+
+
+class PartitionEnv(gym.Env):
+    """
+    自定义环境，分为两阶段：
+    区域切分，每一次切分都是(0, 1)之间的连续值
+    """
+
+    def __init__(self, config=None):
+        super(PartitionEnv, self).__init__()
+        ##############################
+        # 可能需要手动修改的超参数
+        ##############################
+        self.params = 'params_50_50_3'
+        self.ORI_ROW_CUTS = [0, 0.1, 0.4, 0.7, 1]
+        self.ORI_COL_CUTS = [0, 0.5, 1]
+        self.CUT_NUM = 4
+        self.BASE_LINE = 9051.163
+        self.MAX_ADJUST_STEP = 50
+        # self.ADJUST_THRESHOLD = 0.1
+        # self.mTSP_STEPS = 10000
+
+        # 切分位置+/-0.01
+        self.action_space = spaces.Discrete(self.CUT_NUM*2 + 1)
+        # 定义观察空间为8维向量
+        self.observation_space = spaces.Box(
+            low=0.0, high=1.0, shape=(len(self.ORI_ROW_CUTS)+len(self.ORI_COL_CUTS),), dtype=np.float32)
+
+        self.row_cuts = self.ORI_ROW_CUTS[:]
+        self.col_cuts = self.ORI_COL_CUTS[:]
+        self.rectangles = []
+        self.adjust_step = 0
+        self.best_path = None
+
+        # 车队参数设置
+        with open(self.params + '.yml', 'r', encoding='utf-8') as file:
+            params = yaml.safe_load(file)
+
+        self.H = params['H']
+        self.W = params['W']
+        self.center = (self.H/2, self.W/2)
+        self.num_cars = params['num_cars']
+
+        self.flight_time_factor = params['flight_time_factor']
+        self.comp_time_factor = params['comp_time_factor']
+        self.trans_time_factor = params['trans_time_factor']
+        self.car_time_factor = params['car_time_factor']
+        self.bs_time_factor = params['bs_time_factor']
+
+        self.flight_energy_factor = params['flight_energy_factor']
+        self.comp_energy_factor = params['comp_energy_factor']
+        self.trans_energy_factor = params['trans_energy_factor']
+        self.battery_energy_capacity = params['battery_energy_capacity']
+
+    def reset(self, seed=None, options=None):
+        # 重置所有变量，回到切分阶段（phase 0）
+        self.row_cuts = self.ORI_ROW_CUTS[:]
+        self.col_cuts = self.ORI_COL_CUTS[:]
+        self.rectangles = []
+        self.adjust_step = 0
+        self.best_path = None
+
+        # 状态：前 4 维为 partition_values，其余为区域访问状态（初始全0）
+        state = np.array(self.row_cuts + self.col_cuts)
+
+        return state
+
+    def step(self, action):
+        if action == 1:
+            self.row_cuts[1] += 0.01
+        elif action == 2:
+            self.row_cuts[1] -= 0.01
+        elif action == 3:
+            self.row_cuts[2] += 0.01
+        elif action == 4:
+            self.row_cuts[2] -= 0.01
+        elif action == 5:
+            self.row_cuts[3] += 0.01
+        elif action == 6:
+            self.row_cuts[3] -= 0.01
+        elif action == 7:
+            self.col_cuts[1] += 0.01
+        elif action == 8:
+            self.col_cuts[1] -= 0.01
+        elif action == 0:
+            pass
+        # cut_index, signal = (action + 1) // 2, (action + 1) % 2
+        # if action == 0:
+        #     pass
+        # elif cut_index <= 5:
+        #     if signal == 0:
+        #         self.col_cuts[cut_index] += 0.005
+        #     else:
+        #         self.col_cuts[cut_index] -= 0.005
+        # else:
+        #     if signal == 0:
+        #         self.col_cuts[cut_index-4] += 0.005
+        #     else:
+        #         self.col_cuts[cut_index-4] -= 0.005
+
+        # 检查row_cuts和col_cuts是否按升序排列
+        if (all(self.row_cuts[i] < self.row_cuts[i+1] for i in range(len(self.row_cuts)-1)) and
+                all(self.col_cuts[i] < self.col_cuts[i+1] for i in range(len(self.col_cuts)-1))):
+            # 调整是合法的，验证分区情况是否满足条件
+            rectangles = self.if_valid_partition()
+
+            if not rectangles:
+                # 不满足条件，时间给一个很大的值
+                best_time = self.BASE_LINE * 2
+            else:
+                # 满足条件，继续进行路径规划
+                # 每隔10步计算一次路径，第一次也需要计算路径，记录最佳路径
+                if self.adjust_step % 10 == 0 or self.best_path is None:
+                    best_time, self.best_path = self.ga_solver(rectangles)
+                else:
+                    # 根据最佳路径计算当前时间
+                    best_time = self.get_best_time(self.best_path, rectangles)
+                # self.best_path = [33, 30, 29, 28, 27, 21, 15, 0, 13, 7, 1, 2, 31, 14, 8, 3, 4,
+                #                   10, 32, 23, 22, 24, 18, 17, 16, 35, 9, 12, 6, 5, 11, 34, 20, 25, 26, 19, 0]
+                # best_time = self.get_best_time(self.best_path, rectangles)
+
+        else:
+            # 调整不合法，时间给一个很大的值
+            best_time = self.BASE_LINE * 2
+
+        reward = self.calc_reward(best_time)
+        self.adjust_step += 1
+        state = np.array(self.row_cuts + self.col_cuts)
+        info = {'row_cuts': self.row_cuts, 'col_cuts': self.col_cuts,
+                'best_path': self.best_path, 'best_time': best_time}
+
+        if self.adjust_step < self.MAX_ADJUST_STEP:
+            return state, reward, False, False, info
+        else:
+            return state, reward, True, False, info
+
+    def if_valid_partition(self):
+        rectangles = []
+        for i in range(len(self.row_cuts) - 1):
+            for j in range(len(self.col_cuts) - 1):
+                d = (self.col_cuts[j+1] - self.col_cuts[j]) * self.W * \
+                    (self.row_cuts[i+1] -
+                        self.row_cuts[i]) * self.H
+                rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
+                    (self.comp_time_factor - self.trans_time_factor)
+                rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
+                    (self.comp_energy_factor * d -
+                        self.trans_energy_factor * d)
+                if rho_energy_limit < 0:
+                    return []
+                rho = min(rho_time_limit, rho_energy_limit)
+
+                flight_time = self.flight_time_factor * d
+                bs_time = self.bs_time_factor * (1 - rho) * d
+
+                rectangles.append({
+                    'center': ((self.row_cuts[i] + self.row_cuts[i+1]) * self.H / 2, (self.col_cuts[j+1] + self.col_cuts[j]) * self.W / 2),
+                    'flight_time': flight_time,
+                    'bs_time': bs_time,
+                })
+        return rectangles
+
+    def check_adjustment_threshold(self, threshold=0.1):
+        """
+        检查当前切分位置与原始切分位置的差异是否超过阈值
+        Args:
+            threshold (float): 允许的最大调整幅度
+        Returns:
+            bool: 如果任何切分位置的调整超过阈值，返回True
+        """
+        # 检查行切分位置
+        delta = 0
+        for i in range(len(self.row_cuts)):
+            delta += abs(self.row_cuts[i] - self.ORI_ROW_CUTS[i])
+
+        # 检查列切分位置
+        for i in range(len(self.col_cuts)):
+            delta += abs(self.col_cuts[i] - self.ORI_COL_CUTS[i])
+
+        if delta > threshold:
+            return True
+
+        return False
+
+    # def q_learning_solver(self):
+        # 使用q_learning解多旅行商
+        # cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标
+        # rec_center_lt = [rec_info['center']
+        #                  for rec_info in rectangles]
+        # cities = np.column_stack(rec_center_lt)
+        # cities = np.column_stack((self.center, cities))
+
+        # center_idx = []
+        # for i in range(self.num_cars - 1):
+        #     cities = np.column_stack((cities, self.center))
+        #     center_idx.append(cities.shape[1] - 1)
+
+        # tsp = mTSP(params=self.params, num_cities=cities.shape[1], cities=cities, num_cars=self.num_cars,
+        #            center_idx=center_idx, rectangles=rectangles)
+
+        # best_time, best_path = tsp.train(self.mTSP_STEPS)
+
+    def ga_solver(self, rectangles):
+        cities = [self.center]
+        for rec in rectangles:
+            cities.append(rec['center'])
+        cities = np.array(cities)
+
+        center_idx = [0]
+        for i in range(self.num_cars - 1):
+            cities = np.row_stack((cities, self.center))
+            center_idx.append(cities.shape[0] - 1)
+
+        ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20,
+                data=cities, to_process_idx=center_idx, rectangles=rectangles)
+        best_path, best_time = ga.run()
+        return best_time, best_path
+
+    def get_best_time(self, best_path, rectangles):
+        cities = [self.center]
+        for rec in rectangles:
+            cities.append(rec['center'])
+        cities = np.array(cities)
+
+        center_idx = [0]
+        for i in range(self.num_cars - 1):
+            cities = np.row_stack((cities, self.center))
+            center_idx.append(cities.shape[0] - 1)
+
+        ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20,
+                data=cities, to_process_idx=center_idx, rectangles=rectangles)
+        best_time = ga.compute_pathlen(best_path)
+        return best_time
+
+    def calc_reward(self, best_time):
+        """
+        计算奖励：
+        1. 如果时间小于基准线，给予正奖励
+        2. 如果时间大于基准线，给予负奖励
+        3. 保持归一化和折扣因子
+
+        Args:
+            best_time (float): 当前路径的时间
+        Returns:
+            float: 计算得到的奖励值
+        """
+        time_diff = self.BASE_LINE - best_time
+
+        # 使用tanh归一化，确保time_diff=0时，normalized_diff=0
+        # tanh在变量值为2时，就非常接近1了。最大的time_diff为400
+        normalized_diff = np.tanh(time_diff / 200)  # 20是缩放因子，可调整
+
+        # 计算最终奖励
+        reward = normalized_diff
+        # * step_weight  # 10是缩放因子
+
+        return reward
+
+    def render(self):
+        if self.phase == 1:
+            print("Phase 1: Initialize maze environment.")
+            print(f"Partition values so far: {self.partition_values}")
+            print(f"Motorcade positon: {self.car_pos}")
+            # input('1111')
+        elif self.phase == 2:
+            print("Phase 2: Play maze.")
+            print(f'Motorcade trajectory: {self.car_traj}')
+            # input('2222')
--- a/human_action.py
+++ b/human_action.py
@ -1,6 +1,6 @@
 # from env import PartitionMazeEnv
 # from env_dis import PartitionMazeEnv
-from env_partion_dist import PartitionEnv
+from env_partion_dist1 import PartitionEnv

 # env = PartitionMazeEnv()
 env = PartitionEnv()
@ -13,7 +13,7 @@ print('state:', state)
 # action_series = [1] * 30
 # action_series = [[0.2], [0.4], [0.7], [0.5]]
 # action_series = [[-0.08], [-0.08], [0], [0]]
-action_series = list(range(11))
+action_series = list(range(9))

 for i in range(100):
    action = action_series[i]
--- a/solutions/dqn_params_100_100_6.json
+++ b/solutions/dqn_params_100_100_6.json
@ -1,60 +0,0 @@
-{
-    "best_time": 19376.05694186515,
-    "row_cuts": [
-        0,
-        0.2800000000000001,
-        0.43000000000000005,
-        0.62,
-        0.77,
-        1
-    ],
-    "col_cuts": [
-        0,
-        0.2,
-        0.4,
-        0.5,
-        0.7,
-        0.8,
-        1
-    ],
-    "best_path": [
-        33,
-        30,
-        29,
-        28,
-        27,
-        21,
-        15,
-        0,
-        13,
-        7,
-        1,
-        2,
-        31,
-        14,
-        8,
-        3,
-        4,
-        10,
-        32,
-        23,
-        22,
-        24,
-        18,
-        17,
-        16,
-        35,
-        9,
-        12,
-        6,
-        5,
-        11,
-        34,
-        20,
-        25,
-        26,
-        19,
-        0
-    ],
-    "timestamp": "2025-04-06 09:10:53"
-}
--- a/solutions/dqn_params_50_50_3.json
+++ b/solutions/dqn_params_50_50_3.json
@ -1,30 +0,0 @@
-{
-    "best_time": 8820.015746422654,
-    "row_cuts": [
-        0,
-        0.2900000000000001,
-        0.4700000000000001,
-        0.77,
-        1
-    ],
-    "col_cuts": [
-        0,
-        0.5,
-        1
-    ],
-    "best_path": [
-        0,
-        1,
-        3,
-        5,
-        9,
-        7,
-        8,
-        10,
-        2,
-        4,
-        6,
-        0
-    ],
-    "timestamp": "2025-04-03 10:58:44"
-}
--- a/visualization.py
+++ b/visualization.py
@ -199,8 +199,8 @@ if __name__ == "__main__":
    # ---------------------------
    # 需要修改的超参数
    # ---------------------------
-    params_file = 'params_100_100_6'
-    solution_file = r'solutions\finetune_params_100_100_6.json'
+    params_file = 'params_50_50_3'
+    solution_file = r'solutions\dqn_params_100_100_6.json'

    with open(params_file + '.yml', 'r', encoding='utf-8') as file:
        params = yaml.safe_load(file)