diff --git a/Duel_Double_DQN/main.py b/Duel_Double_DQN/main.py index d54aa54..7e769ad 100644 --- a/Duel_Double_DQN/main.py +++ b/Duel_Double_DQN/main.py @@ -10,7 +10,7 @@ import torch import sys import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from env_partion_dist import PartitionEnv +from env_partion_dist1 import PartitionEnv # fmt: on '''Hyperparameter Setting''' diff --git a/Duel_Double_DQN/utils.py b/Duel_Double_DQN/utils.py index e8c6d29..3214558 100644 --- a/Duel_Double_DQN/utils.py +++ b/Duel_Double_DQN/utils.py @@ -47,7 +47,7 @@ def save_best_solution(info_lt): # 读取已有的最优解 try: - with open('solutions/dqn_params_100_100_6.json', 'r') as f: + with open('solutions/dqn_params_50_50_3.json', 'r') as f: saved_solution = json.load(f) saved_time = saved_solution['best_time'] except FileNotFoundError: diff --git a/GA/ga.py b/GA/ga.py index 8add8dc..3d7bb9f 100644 --- a/GA/ga.py +++ b/GA/ga.py @@ -16,7 +16,7 @@ class GA(object): self.location = data self.to_process_idx = to_process_idx self.rectangles = rectangles - self.epochs = 1000 + self.epochs = 500 self.ga_choose_ratio = 0.2 self.mutate_ratio = 0.05 # fruits中存每一个个体是下标的list @@ -314,7 +314,7 @@ class GA(object): early_stop_cnt = 0 else: early_stop_cnt += 1 - if early_stop_cnt == 100: # 若连续50次没有性能提升,则早停 + if early_stop_cnt == 150: # 若连续50次没有性能提升,则早停 break self.best_record.append(1.0 / best_score) best_length = 1.0 / best_score diff --git a/GA/use_ga.py b/GA/use_ga.py new file mode 100644 index 0000000..d81ccc5 --- /dev/null +++ b/GA/use_ga.py @@ -0,0 +1,93 @@ +import random +import math +import yaml +import numpy as np +from utils import if_valid_partition, GA_solver +from itertools import product, combinations +import json +from tqdm import tqdm + +np.random.seed(42) +random.seed(42) +best_T = float('inf') +best_solution = None +best_row_boundaries = None +best_col_boundaries = None + + +# --------------------------- +# 需要修改的超参数 +# --------------------------- +params_file = 'params_50_50_3' + + +with open(params_file + '.yml', 'r', encoding='utf-8') as file: + params = yaml.safe_load(file) + +H = params['H'] +W = params['W'] +k = params['num_cars'] + +flight_time_factor = params['flight_time_factor'] +comp_time_factor = params['comp_time_factor'] +trans_time_factor = params['trans_time_factor'] +car_time_factor = params['car_time_factor'] +bs_time_factor = params['bs_time_factor'] + +flight_energy_factor = params['flight_energy_factor'] +comp_energy_factor = params['comp_energy_factor'] +trans_energy_factor = params['trans_energy_factor'] +battery_energy_capacity = params['battery_energy_capacity'] + +# # 定义数字列表 +# numbers = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + +row_cuts_set = [[0.3, 0.48, 0.77]] +col_cuts_set = [[0.5]] + +for row_cuts in row_cuts_set: + for col_cuts in col_cuts_set: + row_boundaries = [0.0] + list(row_cuts) + [1.0] + col_boundaries = [0.0] + list(col_cuts) + [1.0] + + # 这里面的距离不再是比例,而是真实距离! + rectrangles = if_valid_partition( + row_boundaries, col_boundaries, params) + if not rectrangles: + continue + else: + # 使用遗传算法求出每一种网格划分的可行解,然后选择其中的最优解 + current_solution, current_time, to_process_idx = GA_solver( + rectrangles, params) + + if current_time < best_T: + best_T = current_time + best_solution = current_solution + best_row_boundaries = row_boundaries + best_col_boundaries = col_boundaries + + # 将best_solution分解成每个车队的路径 + found_start_points_indices = [] + for i in range(len(best_solution)): + if best_solution[i] in to_process_idx: + found_start_points_indices.append(i) + car_paths = [] + for j in range(len(found_start_points_indices) - 1): + from_index = found_start_points_indices[j] + end_index = found_start_points_indices[j + 1] + car_path = [] + for k in range(from_index, end_index + 1): + rectrangle_idx = best_solution[k] + if rectrangle_idx not in to_process_idx: + car_path.append(rectrangle_idx - 1) + if car_path: + car_paths.append(car_path) + +# 输出最佳方案 +print("Best solution:", best_solution) +print("Time:", best_T) +print("Row boundaries:", best_row_boundaries) +print("Col boundaries:", best_col_boundaries) +print("Car Paths:", car_paths) + + diff --git a/GA/utils.py b/GA/utils.py index 1407d80..286235f 100644 --- a/GA/utils.py +++ b/GA/utils.py @@ -1,5 +1,6 @@ import numpy as np from ga import GA +import matplotlib.pyplot as plt def if_valid_partition(row_boundaries, col_boundaries, params): @@ -91,4 +92,10 @@ def GA_solver(rectangles, params): if Best_path[-1] not in to_process_idx: Best_path.append(0) + # iterations = model.iter_x + # best_record = model.iter_y + # plt.plot(iterations, best_record) + # plt.show() + + return Best_path, Best, to_process_idx diff --git a/env_partion_dist.py b/env_partion_dist.py index f8c298b..58d550d 100644 --- a/env_partion_dist.py +++ b/env_partion_dist.py @@ -73,37 +73,37 @@ class PartitionEnv(gym.Env): return state def step(self, action): - # if action == 1: - # self.row_cuts[1] += 0.01 - # elif action == 2: - # self.row_cuts[1] -= 0.01 - # elif action == 3: - # self.row_cuts[2] += 0.01 - # elif action == 4: - # self.row_cuts[2] -= 0.01 - # elif action == 5: - # self.row_cuts[3] += 0.01 - # elif action == 6: - # self.row_cuts[3] -= 0.01 - # elif action == 7: - # self.col_cuts[1] += 0.01 - # elif action == 8: - # self.col_cuts[1] -= 0.01 - # elif action == 0: - # pass - cut_index, signal = (action + 1) // 2, (action + 1) % 2 - if action == 0: + if action == 1: + self.row_cuts[1] += 0.01 + elif action == 2: + self.row_cuts[1] -= 0.01 + elif action == 3: + self.row_cuts[2] += 0.01 + elif action == 4: + self.row_cuts[2] -= 0.01 + elif action == 5: + self.row_cuts[3] += 0.01 + elif action == 6: + self.row_cuts[3] -= 0.01 + elif action == 7: + self.col_cuts[1] += 0.01 + elif action == 8: + self.col_cuts[1] -= 0.01 + elif action == 0: pass - elif cut_index <= 5: - if signal == 0: - self.col_cuts[cut_index] += 0.005 - else: - self.col_cuts[cut_index] -= 0.005 - else: - if signal == 0: - self.col_cuts[cut_index-4] += 0.005 - else: - self.col_cuts[cut_index-4] -= 0.005 + # cut_index, signal = (action + 1) // 2, (action + 1) % 2 + # if action == 0: + # pass + # elif cut_index <= 5: + # if signal == 0: + # self.col_cuts[cut_index] += 0.005 + # else: + # self.col_cuts[cut_index] -= 0.005 + # else: + # if signal == 0: + # self.col_cuts[cut_index-4] += 0.005 + # else: + # self.col_cuts[cut_index-4] -= 0.005 # 检查row_cuts和col_cuts是否按升序排列 if (all(self.row_cuts[i] < self.row_cuts[i+1] for i in range(len(self.row_cuts)-1)) and @@ -115,16 +115,16 @@ class PartitionEnv(gym.Env): # 不满足条件,时间给一个很大的值 best_time = self.BASE_LINE * 2 else: - # # 满足条件,继续进行路径规划 - # # 每隔10步计算一次路径,第一次也需要计算路径,记录最佳路径 - # if self.adjust_step % 10 == 0 or self.adjust_step == 1 or self.best_path is None: - # best_time, self.best_path = self.ga_solver(rectangles) - # else: - # # 根据最佳路径计算当前时间 - # best_time = self.get_best_time(self.best_path, rectangles) - self.best_path = [33, 30, 29, 28, 27, 21, 15, 0, 13, 7, 1, 2, 31, 14, 8, 3, 4, - 10, 32, 23, 22, 24, 18, 17, 16, 35, 9, 12, 6, 5, 11, 34, 20, 25, 26, 19, 0] - best_time = self.get_best_time(self.best_path, rectangles) + # 满足条件,继续进行路径规划 + # 每隔10步计算一次路径,第一次也需要计算路径,记录最佳路径 + if self.adjust_step % 10 == 0 or self.best_path is None: + best_time, self.best_path = self.ga_solver(rectangles) + else: + # 根据最佳路径计算当前时间 + best_time = self.get_best_time(self.best_path, rectangles) + # self.best_path = [33, 30, 29, 28, 27, 21, 15, 0, 13, 7, 1, 2, 31, 14, 8, 3, 4, + # 10, 32, 23, 22, 24, 18, 17, 16, 35, 9, 12, 6, 5, 11, 34, 20, 25, 26, 19, 0] + # best_time = self.get_best_time(self.best_path, rectangles) else: # 调整不合法,时间给一个很大的值 diff --git a/env_partion_dist1.py b/env_partion_dist1.py new file mode 100644 index 0000000..3129e08 --- /dev/null +++ b/env_partion_dist1.py @@ -0,0 +1,275 @@ +import gymnasium as gym +from gymnasium import spaces +import numpy as np +import yaml +import math +from mTSP_solver import mTSP +from GA.ga import GA + + +class PartitionEnv(gym.Env): + """ + 自定义环境,分为两阶段: + 区域切分,每一次切分都是(0, 1)之间的连续值 + """ + + def __init__(self, config=None): + super(PartitionEnv, self).__init__() + ############################## + # 可能需要手动修改的超参数 + ############################## + self.params = 'params_50_50_3' + self.ORI_ROW_CUTS = [0, 0.1, 0.4, 0.7, 1] + self.ORI_COL_CUTS = [0, 0.5, 1] + self.CUT_NUM = 4 + self.BASE_LINE = 9051.163 + self.MAX_ADJUST_STEP = 50 + # self.ADJUST_THRESHOLD = 0.1 + # self.mTSP_STEPS = 10000 + + # 切分位置+/-0.01 + self.action_space = spaces.Discrete(self.CUT_NUM*2 + 1) + # 定义观察空间为8维向量 + self.observation_space = spaces.Box( + low=0.0, high=1.0, shape=(len(self.ORI_ROW_CUTS)+len(self.ORI_COL_CUTS),), dtype=np.float32) + + self.row_cuts = self.ORI_ROW_CUTS[:] + self.col_cuts = self.ORI_COL_CUTS[:] + self.rectangles = [] + self.adjust_step = 0 + self.best_path = None + + # 车队参数设置 + with open(self.params + '.yml', 'r', encoding='utf-8') as file: + params = yaml.safe_load(file) + + self.H = params['H'] + self.W = params['W'] + self.center = (self.H/2, self.W/2) + self.num_cars = params['num_cars'] + + self.flight_time_factor = params['flight_time_factor'] + self.comp_time_factor = params['comp_time_factor'] + self.trans_time_factor = params['trans_time_factor'] + self.car_time_factor = params['car_time_factor'] + self.bs_time_factor = params['bs_time_factor'] + + self.flight_energy_factor = params['flight_energy_factor'] + self.comp_energy_factor = params['comp_energy_factor'] + self.trans_energy_factor = params['trans_energy_factor'] + self.battery_energy_capacity = params['battery_energy_capacity'] + + def reset(self, seed=None, options=None): + # 重置所有变量,回到切分阶段(phase 0) + self.row_cuts = self.ORI_ROW_CUTS[:] + self.col_cuts = self.ORI_COL_CUTS[:] + self.rectangles = [] + self.adjust_step = 0 + self.best_path = None + + # 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0) + state = np.array(self.row_cuts + self.col_cuts) + + return state + + def step(self, action): + if action == 1: + self.row_cuts[1] += 0.01 + elif action == 2: + self.row_cuts[1] -= 0.01 + elif action == 3: + self.row_cuts[2] += 0.01 + elif action == 4: + self.row_cuts[2] -= 0.01 + elif action == 5: + self.row_cuts[3] += 0.01 + elif action == 6: + self.row_cuts[3] -= 0.01 + elif action == 7: + self.col_cuts[1] += 0.01 + elif action == 8: + self.col_cuts[1] -= 0.01 + elif action == 0: + pass + # cut_index, signal = (action + 1) // 2, (action + 1) % 2 + # if action == 0: + # pass + # elif cut_index <= 5: + # if signal == 0: + # self.col_cuts[cut_index] += 0.005 + # else: + # self.col_cuts[cut_index] -= 0.005 + # else: + # if signal == 0: + # self.col_cuts[cut_index-4] += 0.005 + # else: + # self.col_cuts[cut_index-4] -= 0.005 + + # 检查row_cuts和col_cuts是否按升序排列 + if (all(self.row_cuts[i] < self.row_cuts[i+1] for i in range(len(self.row_cuts)-1)) and + all(self.col_cuts[i] < self.col_cuts[i+1] for i in range(len(self.col_cuts)-1))): + # 调整是合法的,验证分区情况是否满足条件 + rectangles = self.if_valid_partition() + + if not rectangles: + # 不满足条件,时间给一个很大的值 + best_time = self.BASE_LINE * 2 + else: + # 满足条件,继续进行路径规划 + # 每隔10步计算一次路径,第一次也需要计算路径,记录最佳路径 + if self.adjust_step % 10 == 0 or self.best_path is None: + best_time, self.best_path = self.ga_solver(rectangles) + else: + # 根据最佳路径计算当前时间 + best_time = self.get_best_time(self.best_path, rectangles) + # self.best_path = [33, 30, 29, 28, 27, 21, 15, 0, 13, 7, 1, 2, 31, 14, 8, 3, 4, + # 10, 32, 23, 22, 24, 18, 17, 16, 35, 9, 12, 6, 5, 11, 34, 20, 25, 26, 19, 0] + # best_time = self.get_best_time(self.best_path, rectangles) + + else: + # 调整不合法,时间给一个很大的值 + best_time = self.BASE_LINE * 2 + + reward = self.calc_reward(best_time) + self.adjust_step += 1 + state = np.array(self.row_cuts + self.col_cuts) + info = {'row_cuts': self.row_cuts, 'col_cuts': self.col_cuts, + 'best_path': self.best_path, 'best_time': best_time} + + if self.adjust_step < self.MAX_ADJUST_STEP: + return state, reward, False, False, info + else: + return state, reward, True, False, info + + def if_valid_partition(self): + rectangles = [] + for i in range(len(self.row_cuts) - 1): + for j in range(len(self.col_cuts) - 1): + d = (self.col_cuts[j+1] - self.col_cuts[j]) * self.W * \ + (self.row_cuts[i+1] - + self.row_cuts[i]) * self.H + rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \ + (self.comp_time_factor - self.trans_time_factor) + rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \ + (self.comp_energy_factor * d - + self.trans_energy_factor * d) + if rho_energy_limit < 0: + return [] + rho = min(rho_time_limit, rho_energy_limit) + + flight_time = self.flight_time_factor * d + bs_time = self.bs_time_factor * (1 - rho) * d + + rectangles.append({ + 'center': ((self.row_cuts[i] + self.row_cuts[i+1]) * self.H / 2, (self.col_cuts[j+1] + self.col_cuts[j]) * self.W / 2), + 'flight_time': flight_time, + 'bs_time': bs_time, + }) + return rectangles + + def check_adjustment_threshold(self, threshold=0.1): + """ + 检查当前切分位置与原始切分位置的差异是否超过阈值 + Args: + threshold (float): 允许的最大调整幅度 + Returns: + bool: 如果任何切分位置的调整超过阈值,返回True + """ + # 检查行切分位置 + delta = 0 + for i in range(len(self.row_cuts)): + delta += abs(self.row_cuts[i] - self.ORI_ROW_CUTS[i]) + + # 检查列切分位置 + for i in range(len(self.col_cuts)): + delta += abs(self.col_cuts[i] - self.ORI_COL_CUTS[i]) + + if delta > threshold: + return True + + return False + + # def q_learning_solver(self): + # 使用q_learning解多旅行商 + # cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标 + # rec_center_lt = [rec_info['center'] + # for rec_info in rectangles] + # cities = np.column_stack(rec_center_lt) + # cities = np.column_stack((self.center, cities)) + + # center_idx = [] + # for i in range(self.num_cars - 1): + # cities = np.column_stack((cities, self.center)) + # center_idx.append(cities.shape[1] - 1) + + # tsp = mTSP(params=self.params, num_cities=cities.shape[1], cities=cities, num_cars=self.num_cars, + # center_idx=center_idx, rectangles=rectangles) + + # best_time, best_path = tsp.train(self.mTSP_STEPS) + + def ga_solver(self, rectangles): + cities = [self.center] + for rec in rectangles: + cities.append(rec['center']) + cities = np.array(cities) + + center_idx = [0] + for i in range(self.num_cars - 1): + cities = np.row_stack((cities, self.center)) + center_idx.append(cities.shape[0] - 1) + + ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20, + data=cities, to_process_idx=center_idx, rectangles=rectangles) + best_path, best_time = ga.run() + return best_time, best_path + + def get_best_time(self, best_path, rectangles): + cities = [self.center] + for rec in rectangles: + cities.append(rec['center']) + cities = np.array(cities) + + center_idx = [0] + for i in range(self.num_cars - 1): + cities = np.row_stack((cities, self.center)) + center_idx.append(cities.shape[0] - 1) + + ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20, + data=cities, to_process_idx=center_idx, rectangles=rectangles) + best_time = ga.compute_pathlen(best_path) + return best_time + + def calc_reward(self, best_time): + """ + 计算奖励: + 1. 如果时间小于基准线,给予正奖励 + 2. 如果时间大于基准线,给予负奖励 + 3. 保持归一化和折扣因子 + + Args: + best_time (float): 当前路径的时间 + Returns: + float: 计算得到的奖励值 + """ + time_diff = self.BASE_LINE - best_time + + # 使用tanh归一化,确保time_diff=0时,normalized_diff=0 + # tanh在变量值为2时,就非常接近1了。最大的time_diff为400 + normalized_diff = np.tanh(time_diff / 200) # 20是缩放因子,可调整 + + # 计算最终奖励 + reward = normalized_diff + # * step_weight # 10是缩放因子 + + return reward + + def render(self): + if self.phase == 1: + print("Phase 1: Initialize maze environment.") + print(f"Partition values so far: {self.partition_values}") + print(f"Motorcade positon: {self.car_pos}") + # input('1111') + elif self.phase == 2: + print("Phase 2: Play maze.") + print(f'Motorcade trajectory: {self.car_traj}') + # input('2222') diff --git a/human_action.py b/human_action.py index 46a6e50..34812b2 100644 --- a/human_action.py +++ b/human_action.py @@ -1,6 +1,6 @@ # from env import PartitionMazeEnv # from env_dis import PartitionMazeEnv -from env_partion_dist import PartitionEnv +from env_partion_dist1 import PartitionEnv # env = PartitionMazeEnv() env = PartitionEnv() @@ -13,7 +13,7 @@ print('state:', state) # action_series = [1] * 30 # action_series = [[0.2], [0.4], [0.7], [0.5]] # action_series = [[-0.08], [-0.08], [0], [0]] -action_series = list(range(11)) +action_series = list(range(9)) for i in range(100): action = action_series[i] diff --git a/solutions/dqn_params_100_100_6.json b/solutions/dqn_params_100_100_6.json deleted file mode 100644 index 736ce88..0000000 --- a/solutions/dqn_params_100_100_6.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "best_time": 19376.05694186515, - "row_cuts": [ - 0, - 0.2800000000000001, - 0.43000000000000005, - 0.62, - 0.77, - 1 - ], - "col_cuts": [ - 0, - 0.2, - 0.4, - 0.5, - 0.7, - 0.8, - 1 - ], - "best_path": [ - 33, - 30, - 29, - 28, - 27, - 21, - 15, - 0, - 13, - 7, - 1, - 2, - 31, - 14, - 8, - 3, - 4, - 10, - 32, - 23, - 22, - 24, - 18, - 17, - 16, - 35, - 9, - 12, - 6, - 5, - 11, - 34, - 20, - 25, - 26, - 19, - 0 - ], - "timestamp": "2025-04-06 09:10:53" -} \ No newline at end of file diff --git a/solutions/dqn_params_50_50_3.json b/solutions/dqn_params_50_50_3.json deleted file mode 100644 index 128d989..0000000 --- a/solutions/dqn_params_50_50_3.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "best_time": 8820.015746422654, - "row_cuts": [ - 0, - 0.2900000000000001, - 0.4700000000000001, - 0.77, - 1 - ], - "col_cuts": [ - 0, - 0.5, - 1 - ], - "best_path": [ - 0, - 1, - 3, - 5, - 9, - 7, - 8, - 10, - 2, - 4, - 6, - 0 - ], - "timestamp": "2025-04-03 10:58:44" -} \ No newline at end of file diff --git a/visualization.py b/visualization.py index 859a878..8090f10 100644 --- a/visualization.py +++ b/visualization.py @@ -199,8 +199,8 @@ if __name__ == "__main__": # --------------------------- # 需要修改的超参数 # --------------------------- - params_file = 'params_100_100_6' - solution_file = r'solutions\finetune_params_100_100_6.json' + params_file = 'params_50_50_3' + solution_file = r'solutions\dqn_params_100_100_6.json' with open(params_file + '.yml', 'r', encoding='utf-8') as file: params = yaml.safe_load(file)