diff --git a/DDPG_solver/main.py b/DDPG_solver/main.py index ab28f69..452f4cf 100644 --- a/DDPG_solver/main.py +++ b/DDPG_solver/main.py @@ -72,11 +72,11 @@ def main(): # Seed Everything env_seed = opt.seed - torch.manual_seed(opt.seed) - torch.cuda.manual_seed(opt.seed) - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - print("Random Seed: {}".format(opt.seed)) + # torch.manual_seed(opt.seed) + # torch.cuda.manual_seed(opt.seed) + # torch.backends.cudnn.deterministic = True + # torch.backends.cudnn.benchmark = False + # print("Random Seed: {}".format(opt.seed)) # Build SummaryWriter to record training curves if opt.write: diff --git a/DDPG_solver/utils.py b/DDPG_solver/utils.py index 7816b9e..2039c65 100644 --- a/DDPG_solver/utils.py +++ b/DDPG_solver/utils.py @@ -2,6 +2,7 @@ import torch.nn.functional as F import torch.nn as nn import argparse import torch +import numpy as np class Actor(nn.Module): def __init__(self, state_dim, action_dim, net_width, maxaction): @@ -40,14 +41,16 @@ def evaluate_policy(env, agent, turns = 3): for j in range(turns): s = env.reset() done = False + action_series = [] while not done: # Take deterministic actions at test time a = agent.select_action(s, deterministic=True) s_next, r, dw, tr, info = env.step(a) done = (dw or tr) - + action_series.append(a[0]) total_scores += r s = s_next + print(np.round(action_series, 3)) return int(total_scores/turns) diff --git a/Duel_Double_DQN/DQN.py b/Duel_Double_DQN/DQN.py index 8791edf..d03adf8 100644 --- a/Duel_Double_DQN/DQN.py +++ b/Duel_Double_DQN/DQN.py @@ -65,7 +65,7 @@ class DQN_agent(object): if state[0][0] == 0: a = np.random.randint(0,10) else: - a = np.random.randint(10,13) + a = np.random.randint(10,14) else: if state[0][0] == 0: q_value = self.q_net(state) diff --git a/Duel_Double_DQN/main.py b/Duel_Double_DQN/main.py index 6338b48..fff5c96 100644 --- a/Duel_Double_DQN/main.py +++ b/Duel_Double_DQN/main.py @@ -3,6 +3,7 @@ import os import shutil import argparse import torch +import numpy as np import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from env_dis import PartitionMazeEnv @@ -111,19 +112,20 @@ def main(): print('EnvName:', BriefEnvName[opt.EnvIdex], 'seed:', opt.seed, 'score:', score) else: - total_steps = 1 + total_steps = 0 while total_steps < opt.Max_train_steps: # Do not use opt.seed directly, or it can overfit to opt.seed s = env.reset(seed=env_seed) - env_seed += 1 done = False '''Interact & trian''' while not done: # e-greedy exploration if total_steps < opt.random_steps: - # TODO sample取值有问题 - a = env.action_space.sample() + if s[0] == 0: + a = np.random.randint(0, 10) + else: + a = np.random.randint(10, 14) else: a = agent.select_action(s, deterministic=False) s_next, r, dw, tr, info = env.step(a) diff --git a/env.py b/env.py index 12c7966..540769c 100644 --- a/env.py +++ b/env.py @@ -39,8 +39,9 @@ class PartitionMazeEnv(gym.Env): ############################## # 可能需要手动修改的超参数 ############################## - self.CUT_NUM = 6 # 横切一半,竖切一半 - self.BASE_LINE = 12000 # 基准时间,通过greedy或者蒙特卡洛计算出来 + self.CUT_NUM = 4 # 横切一半,竖切一半 + self.BASE_LINE = 4000 # 基准时间,通过greedy或者蒙特卡洛计算出来 + self.MAX_STEPS = 200 # 迷宫走法步数上限 self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段 self.partition_step = 0 # 区域划分阶段步数,范围 0~4 @@ -65,7 +66,6 @@ class PartitionMazeEnv(gym.Env): self.init_maze_step = 0 # 路径规划阶段相关变量 - self.MAX_STEPS = 50 # 迷宫走法步数上限 self.step_count = 0 self.rectangles = {} self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)] @@ -159,40 +159,35 @@ class PartitionMazeEnv(gym.Env): else: # 进入阶段 1:初始化迷宫 self.phase = 1 - state = np.concatenate( - [self.partition_values, np.array(self.car_pos).flatten()]) reward = 10 # 构建反向索引,方便后续计算 self.reverse_rectangles = {v['center']: k for k, v in self.rectangles.items()} + + region_centers = [ + (i, j, self.rectangles[(i, j)]['center']) + for i in range(len(self.row_cuts) - 1) + for j in range(len(self.col_cuts) - 1) + ] + # 按照与区域中心的距离从近到远排序 + region_centers.sort( + key=lambda x: math.dist(x[2], (self.H / 2, self.W / 2)) + ) + + # 分配最近的区域给每辆车 + for idx in range(self.num_cars): + i, j, center = region_centers[idx] + self.car_pos[idx] = center + self.car_traj[idx].append((i, j)) + self.rectangles[(i, j)]['is_visited'] = True + + # 进入阶段 2:走迷宫 + self.phase = 2 + state = np.concatenate( + [self.partition_values, np.array(self.car_pos).flatten()] + ) return state, reward, False, False, {} - elif self.phase == 1: - # 阶段 1:初始化迷宫,让多个车辆从区域中心出发,前往最近的几个区域中心点 - region_centers = [ - (i, j, self.rectangles[(i, j)]['center']) - for i in range(len(self.row_cuts) - 1) - for j in range(len(self.col_cuts) - 1) - ] - # 按照与区域中心的距离从近到远排序 - region_centers.sort( - key=lambda x: math.dist(x[2], (self.H / 2, self.W / 2)) - ) - - # 分配最近的区域给每辆车 - for idx in range(self.num_cars): - i, j, center = region_centers[idx] - self.car_pos[idx] = center - self.car_traj[idx].append((i, j)) - self.rectangles[(i, j)]['is_visited'] = True - - # 进入阶段 2:走迷宫 - self.phase = 2 - state = np.concatenate( - [self.partition_values, np.array(self.car_pos).flatten()] - ) - return state, 0.0, False, False, {} - elif self.phase == 2: # 阶段 2:路径规划(走迷宫) current_car = self.current_car_index diff --git a/env_dis.py b/env_dis.py index f7d55f5..e96715f 100644 --- a/env_dis.py +++ b/env_dis.py @@ -47,9 +47,9 @@ class PartitionMazeEnv(gym.Env): self.partition_values = np.zeros( self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂ - # 定义动作空间:长度为 14 的离散动作空间 - # 前 10 个表示切分动作 {0, 0.1, ..., 0.9},后 4 个表示上下左右移动 - self.action_space = spaces.Discrete(14) + # 定义动作空间:长度为 15 的离散动作空间 + # 前 10 个表示切分动作 {0, 0.1, ..., 0.9},后 5 个表示上下左右移动和保持不动 + self.action_space = spaces.Discrete(15) # 定义观察空间为8维向量 # TODO 返回的状态目前只有位置坐标 @@ -156,45 +156,40 @@ class PartitionMazeEnv(gym.Env): [[self.phase], self.partition_values, np.zeros(np.array(self.car_pos).flatten().shape[0], dtype=np.float32)]) return state, reward, True, False, {} else: - # 进入阶段 1:初始化迷宫 + # 初始化迷宫 self.phase = 1 - state = np.concatenate( - [[self.phase], self.partition_values, np.array(self.car_pos).flatten()]) reward = 10 # 构建反向索引,方便后续计算 self.reverse_rectangles = { v['center']: k for k, v in self.rectangles.items()} + + # 阶段 1:初始化迷宫,让多个车辆从区域中心出发,前往最近的几个区域中心点 + region_centers = [ + (i, j, self.rectangles[(i, j)]['center']) + for i in range(len(self.row_cuts) - 1) + for j in range(len(self.col_cuts) - 1) + ] + # 按照与区域中心的距离从近到远排序 + region_centers.sort( + key=lambda x: math.dist(x[2], (self.H / 2, self.W / 2)) + ) + + # 分配最近的区域给每辆车 + for idx in range(self.num_cars): + i, j, center = region_centers[idx] + self.car_pos[idx] = center + self.car_traj[idx].append((i, j)) + self.rectangles[(i, j)]['is_visited'] = True + + # 进入阶段 2:走迷宫 + self.phase = 2 + state = np.concatenate( + [[self.phase], self.partition_values, + np.array(self.car_pos).flatten()] + ) return state, reward, False, False, {} - elif self.phase == 1: - # TODO 阶段一可以不写出来!!! - # 阶段 1:初始化迷宫,让多个车辆从区域中心出发,前往最近的几个区域中心点 - region_centers = [ - (i, j, self.rectangles[(i, j)]['center']) - for i in range(len(self.row_cuts) - 1) - for j in range(len(self.col_cuts) - 1) - ] - # 按照与区域中心的距离从近到远排序 - region_centers.sort( - key=lambda x: math.dist(x[2], (self.H / 2, self.W / 2)) - ) - - # 分配最近的区域给每辆车 - for idx in range(self.num_cars): - i, j, center = region_centers[idx] - self.car_pos[idx] = center - self.car_traj[idx].append((i, j)) - self.rectangles[(i, j)]['is_visited'] = True - - # 进入阶段 2:走迷宫 - self.phase = 2 - state = np.concatenate( - [[self.phase], self.partition_values, - np.array(self.car_pos).flatten()] - ) - return state, 0.0, False, False, {} - elif self.phase == 2: # 阶段 2:路径规划(走迷宫) # 后 4 个动作对应上下左右移动 @@ -212,6 +207,9 @@ class PartitionMazeEnv(gym.Env): new_col = current_col - 1 elif action == 13 and current_col < len(self.col_cuts) - 2: # 右 new_col = new_col + 1 + else: + # 无效动作,保持原地 + pass # 更新车辆位置 self.car_pos[current_car] = self.rectangles[( diff --git a/human_action.py b/human_action.py index add8f4c..2275342 100644 --- a/human_action.py +++ b/human_action.py @@ -1,12 +1,13 @@ -# from env import PartitionMazeEnv -from env_dis import PartitionMazeEnv +from env import PartitionMazeEnv +# from env_dis import PartitionMazeEnv env = PartitionMazeEnv() state = env.reset() print(state) -action_series = [0, 0, 3, 0, 0, 10] +action_series = [[0.1], [0.2], [0.4], [0], [0.1]] +# action_series = [0, 0, 3, 0, 0, 10] for i in range(100): action = action_series[i] diff --git a/params2.yml b/params2.yml new file mode 100644 index 0000000..382ea74 --- /dev/null +++ b/params2.yml @@ -0,0 +1,16 @@ +H : 50 # 区域高度,网格点之间的距离为25m(单位距离) +W : 50 # 区域宽度 +num_cars : 3 # 系统数量(车-巢-机系统个数) + +# 时间系数(单位:秒,每个网格一张照片) +flight_time_factor : 3 # 每张照片对应的飞行时间,无人机飞行速度为9.5m/s,拍摄照片的时间间隔为3s +comp_time_factor : 5 # 无人机上每张照片计算时间,5s +trans_time_factor : 0.3 # 每张照片传输时间,0.3s +car_time_factor : 100 # TODO 汽车每单位距离的移动时间,2s,加了一个放大因子50 +bs_time_factor : 5 # 机巢上每张照片计算时间 + +# 其他参数 +flight_energy_factor : 0.05 # 单位:分钟/张 +comp_energy_factor : 0.05 # TODO 计算能耗需要重新估计 +trans_energy_factor : 0.0025 +battery_energy_capacity : 20 # 无人机只进行飞行,续航为30分钟 \ No newline at end of file