From 34725a8edf5e7564419146bcbd3928728d8b549c Mon Sep 17 00:00:00 2001 From: weixin_46229132 Date: Thu, 6 Mar 2025 20:44:30 +0800 Subject: [PATCH] first commit --- dqn.py | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++ env.py | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++ run_dqn.py | 95 +++++++++++++++++++++++++++++++++++++ 3 files changed, 360 insertions(+) create mode 100644 dqn.py create mode 100644 env.py create mode 100644 run_dqn.py diff --git a/dqn.py b/dqn.py new file mode 100644 index 0000000..343481d --- /dev/null +++ b/dqn.py @@ -0,0 +1,131 @@ +import torch +import torch.nn as nn +import torch.optim as optim +import numpy as np +from collections import deque +import random + +class DQN(nn.Module): + def __init__(self, state_dim, action_dim): + super(DQN, self).__init__() + + self.network = nn.Sequential( + nn.Linear(state_dim, 128), + nn.ReLU(), + nn.Linear(128, 128), + nn.ReLU(), + nn.Linear(128, action_dim) + ) + + def forward(self, x): + return self.network(x) + +class Agent: + def __init__(self, state_dim, action_dim): + self.state_dim = state_dim + self.action_dim = action_dim + + # DQN网络 + self.eval_net = DQN(state_dim, action_dim) + self.target_net = DQN(state_dim, action_dim) + self.target_net.load_state_dict(self.eval_net.state_dict()) + + # 训练参数 + self.learning_rate = 0.001 + self.gamma = 0.99 + self.epsilon = 1.0 + self.epsilon_min = 0.01 + self.epsilon_decay = 0.995 + self.memory = deque(maxlen=10000) + self.batch_size = 64 + self.optimizer = optim.Adam(self.eval_net.parameters(), lr=self.learning_rate) + + # 离散化动作空间 + self.v_cuts_actions = [1, 2, 3, 4, 5] # 垂直切割数选项 + self.h_cuts_actions = [1, 2, 3, 4, 5] # 水平切割数选项 + # self.rho_actions = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0] # 卸载率选项 + + def discretize_action(self, q_values): + """将Q值转换为离散动作""" + action = [] + + # 分别为三个维度选择动作 + idx = 0 + # 垂直切割数 + v_cuts_q = q_values[idx:idx+len(self.v_cuts_actions)] + v_cuts_idx = torch.argmax(v_cuts_q).item() + action.append(self.v_cuts_actions[v_cuts_idx]) + idx += len(self.v_cuts_actions) + + # 水平切割数 + h_cuts_q = q_values[idx:idx+len(self.h_cuts_actions)] + h_cuts_idx = torch.argmax(h_cuts_q).item() + action.append(self.h_cuts_actions[h_cuts_idx]) + idx += len(self.h_cuts_actions) + + # # 卸载率 + # rho_q = q_values[idx:idx+len(self.rho_actions)] + # rho_idx = torch.argmax(rho_q).item() + # action.append(self.rho_actions[rho_idx]) + + return np.array(action) + + def get_action_dim(self): + """获取离散化后的动作空间维度""" + return (len(self.v_cuts_actions) + + len(self.h_cuts_actions)) + # len(self.rho_actions)) + + def choose_action(self, state): + if random.random() < self.epsilon: + # 随机选择动作 + v_cuts = random.choice(self.v_cuts_actions) + h_cuts = random.choice(self.h_cuts_actions) + # rho = random.choice(self.rho_actions) + return np.array([v_cuts, h_cuts]) + else: + # 根据Q值选择动作 + state = torch.FloatTensor(state).unsqueeze(0) + q_values = self.eval_net(state) + return self.discretize_action(q_values[0]) + + def store_transition(self, state, action, reward, next_state, done): + self.memory.append((state, action, reward, next_state, done)) + + def learn(self): + if len(self.memory) < self.batch_size: + return + + # 随机采样batch + batch = random.sample(self.memory, self.batch_size) + states = torch.FloatTensor([x[0] for x in batch]) + actions = torch.FloatTensor([x[1] for x in batch]) + rewards = torch.FloatTensor([x[2] for x in batch]) + next_states = torch.FloatTensor([x[3] for x in batch]) + dones = torch.FloatTensor([x[4] for x in batch]) + + # 计算当前Q值 + current_q_values = self.eval_net(states) + + # 计算目标Q值 + next_q_values = self.target_net(next_states).detach() + max_next_q = torch.max(next_q_values, dim=1)[0] + target_q_values = rewards + (1 - dones) * self.gamma * max_next_q + + # 计算损失 + loss = nn.MSELoss()(current_q_values.mean(), target_q_values.mean()) + + # 更新网络 + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + + # 更新epsilon + self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay) + + # 定期更新目标网络 + if self.learn.counter % 100 == 0: + self.target_net.load_state_dict(self.eval_net.state_dict()) + + # 添加计数器属性 + learn.counter = 0 diff --git a/env.py b/env.py new file mode 100644 index 0000000..8c8ed6f --- /dev/null +++ b/env.py @@ -0,0 +1,134 @@ +import numpy as np +import gym +from gym import spaces + + +class Env(gym.Env): + """多车-巢-机系统的区域覆盖环境""" + + def __init__(self): + super(Env, self).__init__() + + # 环境参数 + self.H = 20 # 区域高度 + self.W = 25 # 区域宽度 + self.k = 1 # 系统数量 + + # 时间系数 + self.flight_time_factor = 3 # 每张照片飞行时间 + self.comp_uav_factor = 5 # 无人机计算时间 + self.trans_time_factor = 0.3 # 传输时间 + self.car_move_time_factor = 100 # 汽车移动时间 + self.comp_bs_factor = 5 # 机巢计算时间 + + # 能量参数 + self.flight_energy_factor = 0.05 # 飞行能耗 + self.comp_energy_factor = 0.05 # 计算能耗 + self.trans_energy_factor = 0.0025 # 传输能耗 + self.battery_capacity = 30 # 电池容量 + + # 动作空间 + # [垂直切割数, 水平切割数, 卸载率] + self.action_space = spaces.Box( + low=np.array([1, 1, 0]), + high=np.array([5, 5, 1]), + dtype=np.float32 + ) + + # 状态空间 + # [当前垂直切割数, 当前水平切割数, 当前最大完成时间] + self.observation_space = spaces.Box( + low=np.array([1, 1, 0]), + high=np.array([5, 5, float('inf')]), + dtype=np.float32 + ) + + self.state = None + self.current_step = 0 + self.max_steps = 1000 + + def step(self, action): + self.current_step += 1 + + # 解析动作 + v_cuts = int(action[0]) # 垂直切割数 + h_cuts = int(action[1]) # 水平切割数 + # rho = action[2] # 卸载率 + + # TODO 生成切割位置,目前是均匀切割 + v_boundaries = np.linspace(0, self.H, v_cuts + 1) + h_boundaries = np.linspace(0, self.W, h_cuts + 1) + + # 计算每个子区域的指标 + total_time = 0 + valid_partition = True + + for i in range(len(v_boundaries) - 1): + for j in range(len(h_boundaries) - 1): + # 计算子区域大小 + height = v_boundaries[i+1] - v_boundaries[i] + width = h_boundaries[j+1] - h_boundaries[j] + area = height * width + + # 求解rho + rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \ + (self.comp_uav_factor - self.trans_time_factor) + rho_energy_limit = (self.battery_capacity - self.flight_energy_factor * area - self.trans_energy_factor * area) / \ + (self.comp_energy_factor * area - self.trans_energy_factor * area) + if rho_energy_limit < 0: + valid_partition = False + break + rho = min(rho_time_limit, rho_energy_limit) + + # 计算各阶段时间 + flight_time = self.flight_time_factor * area + comp_time = self.comp_uav_factor * rho * area + trans_time = self.trans_time_factor * (1 - rho) * area + comp_bs_time = self.comp_bs_factor * (1 - rho) * area + + # # 计算能耗 + # flight_energy = self.flight_energy_factor * area + # comp_energy = self.comp_energy_factor * rho * area + # trans_energy = self.trans_energy_factor * (1 - rho) * area + # total_energy = flight_energy + comp_energy + trans_energy + + # # 检查约束 + # if total_energy > self.battery_capacity or (comp_time + trans_time > flight_time): + # valid_partition = False + # break + + # 计算子区域中心到区域中心的距离 + center_y = (v_boundaries[i] + v_boundaries[i+1]) / 2 + center_x = (h_boundaries[j] + h_boundaries[j+1]) / 2 + dist_to_center = np.sqrt( + (center_y - self.H/2)**2 + (center_x - self.W/2)**2) + car_time = dist_to_center * self.car_move_time_factor + + # 更新总时间 + task_time = max(flight_time + car_time, comp_bs_time) + total_time = max(total_time, task_time) + + if not valid_partition: + break + + # 计算奖励 + if not valid_partition: + reward = -10000 # 惩罚无效方案 + done = True + else: + reward = -total_time # 负的完成时间作为奖励 + done = self.current_step >= self.max_steps + + # 更新状态 + self.state = np.array([v_cuts, h_cuts, total_time]) + + return self.state, reward, done, {} + + def reset(self): + # 初始化状态 + self.state = np.array([1, 1, 0]) + self.current_step = 0 + return self.state + + def render(self, mode='human'): + pass diff --git a/run_dqn.py b/run_dqn.py new file mode 100644 index 0000000..a123e41 --- /dev/null +++ b/run_dqn.py @@ -0,0 +1,95 @@ +from env import Env +from dqn import Agent +import numpy as np +import matplotlib.pyplot as plt + + +def train(): + # 创建环境和智能体 + env = Env() + state_dim = env.observation_space.shape[0] + action_dim = 10 # len(垂直切割数)+len(水平切割数) + + agent = Agent(state_dim, action_dim) + + # 训练参数 + episodes = 1000 + max_steps = 1000 + + # 记录训练过程 + rewards_history = [] + best_reward = float('-inf') + best_solution = None + + # 开始训练 + for episode in range(episodes): + state = env.reset() + episode_reward = 0 + + for step in range(max_steps): + # 选择动作 + action = agent.choose_action(state) + + # 执行动作 + next_state, reward, done, _ = env.step(action) + + # 存储经验 + agent.store_transition(state, action, reward, next_state, done) + + # 学习 + agent.learn() + + episode_reward += reward + state = next_state + + if done: + break + + # 记录每个episode的总奖励 + rewards_history.append(episode_reward) + + # 更新最佳解 + if episode_reward > best_reward: + best_reward = episode_reward + best_solution = { + 'vertical_cuts': int(action[0]), + 'horizontal_cuts': int(action[1]), + # 'offload_ratio': action[2], + 'total_time': -reward if reward != -1000 else float('inf'), + 'episode': episode + } + + # 打印训练进度 + if (episode + 1) % 10 == 0: + avg_reward = np.mean(rewards_history[-10:]) + print(f"Episode {episode + 1}, Average Reward: {avg_reward:.2f}") + + return best_solution, rewards_history + + +def plot_training_results(rewards_history): + plt.figure(figsize=(10, 5)) + plt.plot(rewards_history) + plt.title('Training Progress') + plt.xlabel('Episode') + plt.ylabel('Total Reward') + plt.grid(True) + plt.show() + + +def print_solution(solution): + print("\n最佳解决方案:") + print(f"在第 {solution['episode']} 轮找到") + print(f"垂直切割数: {solution['vertical_cuts']}") + print(f"水平切割数: {solution['horizontal_cuts']}") + print(f"任务卸载率: {solution['offload_ratio']:.2f}") + print(f"总完成时间: {solution['total_time']:.2f} 秒") + + +if __name__ == "__main__": + # 训练模型 + best_solution, rewards_history = train() + + # 显示结果 + plot_training_results(rewards_history) + print_solution(best_solution)