first commit
This commit is contained in:
parent
1be980dd0e
commit
34725a8edf
131
dqn.py
Normal file
131
dqn.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
import numpy as np
|
||||||
|
from collections import deque
|
||||||
|
import random
|
||||||
|
|
||||||
|
class DQN(nn.Module):
|
||||||
|
def __init__(self, state_dim, action_dim):
|
||||||
|
super(DQN, self).__init__()
|
||||||
|
|
||||||
|
self.network = nn.Sequential(
|
||||||
|
nn.Linear(state_dim, 128),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Linear(128, 128),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Linear(128, action_dim)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.network(x)
|
||||||
|
|
||||||
|
class Agent:
|
||||||
|
def __init__(self, state_dim, action_dim):
|
||||||
|
self.state_dim = state_dim
|
||||||
|
self.action_dim = action_dim
|
||||||
|
|
||||||
|
# DQN网络
|
||||||
|
self.eval_net = DQN(state_dim, action_dim)
|
||||||
|
self.target_net = DQN(state_dim, action_dim)
|
||||||
|
self.target_net.load_state_dict(self.eval_net.state_dict())
|
||||||
|
|
||||||
|
# 训练参数
|
||||||
|
self.learning_rate = 0.001
|
||||||
|
self.gamma = 0.99
|
||||||
|
self.epsilon = 1.0
|
||||||
|
self.epsilon_min = 0.01
|
||||||
|
self.epsilon_decay = 0.995
|
||||||
|
self.memory = deque(maxlen=10000)
|
||||||
|
self.batch_size = 64
|
||||||
|
self.optimizer = optim.Adam(self.eval_net.parameters(), lr=self.learning_rate)
|
||||||
|
|
||||||
|
# 离散化动作空间
|
||||||
|
self.v_cuts_actions = [1, 2, 3, 4, 5] # 垂直切割数选项
|
||||||
|
self.h_cuts_actions = [1, 2, 3, 4, 5] # 水平切割数选项
|
||||||
|
# self.rho_actions = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0] # 卸载率选项
|
||||||
|
|
||||||
|
def discretize_action(self, q_values):
|
||||||
|
"""将Q值转换为离散动作"""
|
||||||
|
action = []
|
||||||
|
|
||||||
|
# 分别为三个维度选择动作
|
||||||
|
idx = 0
|
||||||
|
# 垂直切割数
|
||||||
|
v_cuts_q = q_values[idx:idx+len(self.v_cuts_actions)]
|
||||||
|
v_cuts_idx = torch.argmax(v_cuts_q).item()
|
||||||
|
action.append(self.v_cuts_actions[v_cuts_idx])
|
||||||
|
idx += len(self.v_cuts_actions)
|
||||||
|
|
||||||
|
# 水平切割数
|
||||||
|
h_cuts_q = q_values[idx:idx+len(self.h_cuts_actions)]
|
||||||
|
h_cuts_idx = torch.argmax(h_cuts_q).item()
|
||||||
|
action.append(self.h_cuts_actions[h_cuts_idx])
|
||||||
|
idx += len(self.h_cuts_actions)
|
||||||
|
|
||||||
|
# # 卸载率
|
||||||
|
# rho_q = q_values[idx:idx+len(self.rho_actions)]
|
||||||
|
# rho_idx = torch.argmax(rho_q).item()
|
||||||
|
# action.append(self.rho_actions[rho_idx])
|
||||||
|
|
||||||
|
return np.array(action)
|
||||||
|
|
||||||
|
def get_action_dim(self):
|
||||||
|
"""获取离散化后的动作空间维度"""
|
||||||
|
return (len(self.v_cuts_actions) +
|
||||||
|
len(self.h_cuts_actions))
|
||||||
|
# len(self.rho_actions))
|
||||||
|
|
||||||
|
def choose_action(self, state):
|
||||||
|
if random.random() < self.epsilon:
|
||||||
|
# 随机选择动作
|
||||||
|
v_cuts = random.choice(self.v_cuts_actions)
|
||||||
|
h_cuts = random.choice(self.h_cuts_actions)
|
||||||
|
# rho = random.choice(self.rho_actions)
|
||||||
|
return np.array([v_cuts, h_cuts])
|
||||||
|
else:
|
||||||
|
# 根据Q值选择动作
|
||||||
|
state = torch.FloatTensor(state).unsqueeze(0)
|
||||||
|
q_values = self.eval_net(state)
|
||||||
|
return self.discretize_action(q_values[0])
|
||||||
|
|
||||||
|
def store_transition(self, state, action, reward, next_state, done):
|
||||||
|
self.memory.append((state, action, reward, next_state, done))
|
||||||
|
|
||||||
|
def learn(self):
|
||||||
|
if len(self.memory) < self.batch_size:
|
||||||
|
return
|
||||||
|
|
||||||
|
# 随机采样batch
|
||||||
|
batch = random.sample(self.memory, self.batch_size)
|
||||||
|
states = torch.FloatTensor([x[0] for x in batch])
|
||||||
|
actions = torch.FloatTensor([x[1] for x in batch])
|
||||||
|
rewards = torch.FloatTensor([x[2] for x in batch])
|
||||||
|
next_states = torch.FloatTensor([x[3] for x in batch])
|
||||||
|
dones = torch.FloatTensor([x[4] for x in batch])
|
||||||
|
|
||||||
|
# 计算当前Q值
|
||||||
|
current_q_values = self.eval_net(states)
|
||||||
|
|
||||||
|
# 计算目标Q值
|
||||||
|
next_q_values = self.target_net(next_states).detach()
|
||||||
|
max_next_q = torch.max(next_q_values, dim=1)[0]
|
||||||
|
target_q_values = rewards + (1 - dones) * self.gamma * max_next_q
|
||||||
|
|
||||||
|
# 计算损失
|
||||||
|
loss = nn.MSELoss()(current_q_values.mean(), target_q_values.mean())
|
||||||
|
|
||||||
|
# 更新网络
|
||||||
|
self.optimizer.zero_grad()
|
||||||
|
loss.backward()
|
||||||
|
self.optimizer.step()
|
||||||
|
|
||||||
|
# 更新epsilon
|
||||||
|
self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
|
||||||
|
|
||||||
|
# 定期更新目标网络
|
||||||
|
if self.learn.counter % 100 == 0:
|
||||||
|
self.target_net.load_state_dict(self.eval_net.state_dict())
|
||||||
|
|
||||||
|
# 添加计数器属性
|
||||||
|
learn.counter = 0
|
134
env.py
Normal file
134
env.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
import numpy as np
|
||||||
|
import gym
|
||||||
|
from gym import spaces
|
||||||
|
|
||||||
|
|
||||||
|
class Env(gym.Env):
|
||||||
|
"""多车-巢-机系统的区域覆盖环境"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(Env, self).__init__()
|
||||||
|
|
||||||
|
# 环境参数
|
||||||
|
self.H = 20 # 区域高度
|
||||||
|
self.W = 25 # 区域宽度
|
||||||
|
self.k = 1 # 系统数量
|
||||||
|
|
||||||
|
# 时间系数
|
||||||
|
self.flight_time_factor = 3 # 每张照片飞行时间
|
||||||
|
self.comp_uav_factor = 5 # 无人机计算时间
|
||||||
|
self.trans_time_factor = 0.3 # 传输时间
|
||||||
|
self.car_move_time_factor = 100 # 汽车移动时间
|
||||||
|
self.comp_bs_factor = 5 # 机巢计算时间
|
||||||
|
|
||||||
|
# 能量参数
|
||||||
|
self.flight_energy_factor = 0.05 # 飞行能耗
|
||||||
|
self.comp_energy_factor = 0.05 # 计算能耗
|
||||||
|
self.trans_energy_factor = 0.0025 # 传输能耗
|
||||||
|
self.battery_capacity = 30 # 电池容量
|
||||||
|
|
||||||
|
# 动作空间
|
||||||
|
# [垂直切割数, 水平切割数, 卸载率]
|
||||||
|
self.action_space = spaces.Box(
|
||||||
|
low=np.array([1, 1, 0]),
|
||||||
|
high=np.array([5, 5, 1]),
|
||||||
|
dtype=np.float32
|
||||||
|
)
|
||||||
|
|
||||||
|
# 状态空间
|
||||||
|
# [当前垂直切割数, 当前水平切割数, 当前最大完成时间]
|
||||||
|
self.observation_space = spaces.Box(
|
||||||
|
low=np.array([1, 1, 0]),
|
||||||
|
high=np.array([5, 5, float('inf')]),
|
||||||
|
dtype=np.float32
|
||||||
|
)
|
||||||
|
|
||||||
|
self.state = None
|
||||||
|
self.current_step = 0
|
||||||
|
self.max_steps = 1000
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
self.current_step += 1
|
||||||
|
|
||||||
|
# 解析动作
|
||||||
|
v_cuts = int(action[0]) # 垂直切割数
|
||||||
|
h_cuts = int(action[1]) # 水平切割数
|
||||||
|
# rho = action[2] # 卸载率
|
||||||
|
|
||||||
|
# TODO 生成切割位置,目前是均匀切割
|
||||||
|
v_boundaries = np.linspace(0, self.H, v_cuts + 1)
|
||||||
|
h_boundaries = np.linspace(0, self.W, h_cuts + 1)
|
||||||
|
|
||||||
|
# 计算每个子区域的指标
|
||||||
|
total_time = 0
|
||||||
|
valid_partition = True
|
||||||
|
|
||||||
|
for i in range(len(v_boundaries) - 1):
|
||||||
|
for j in range(len(h_boundaries) - 1):
|
||||||
|
# 计算子区域大小
|
||||||
|
height = v_boundaries[i+1] - v_boundaries[i]
|
||||||
|
width = h_boundaries[j+1] - h_boundaries[j]
|
||||||
|
area = height * width
|
||||||
|
|
||||||
|
# 求解rho
|
||||||
|
rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
|
||||||
|
(self.comp_uav_factor - self.trans_time_factor)
|
||||||
|
rho_energy_limit = (self.battery_capacity - self.flight_energy_factor * area - self.trans_energy_factor * area) / \
|
||||||
|
(self.comp_energy_factor * area - self.trans_energy_factor * area)
|
||||||
|
if rho_energy_limit < 0:
|
||||||
|
valid_partition = False
|
||||||
|
break
|
||||||
|
rho = min(rho_time_limit, rho_energy_limit)
|
||||||
|
|
||||||
|
# 计算各阶段时间
|
||||||
|
flight_time = self.flight_time_factor * area
|
||||||
|
comp_time = self.comp_uav_factor * rho * area
|
||||||
|
trans_time = self.trans_time_factor * (1 - rho) * area
|
||||||
|
comp_bs_time = self.comp_bs_factor * (1 - rho) * area
|
||||||
|
|
||||||
|
# # 计算能耗
|
||||||
|
# flight_energy = self.flight_energy_factor * area
|
||||||
|
# comp_energy = self.comp_energy_factor * rho * area
|
||||||
|
# trans_energy = self.trans_energy_factor * (1 - rho) * area
|
||||||
|
# total_energy = flight_energy + comp_energy + trans_energy
|
||||||
|
|
||||||
|
# # 检查约束
|
||||||
|
# if total_energy > self.battery_capacity or (comp_time + trans_time > flight_time):
|
||||||
|
# valid_partition = False
|
||||||
|
# break
|
||||||
|
|
||||||
|
# 计算子区域中心到区域中心的距离
|
||||||
|
center_y = (v_boundaries[i] + v_boundaries[i+1]) / 2
|
||||||
|
center_x = (h_boundaries[j] + h_boundaries[j+1]) / 2
|
||||||
|
dist_to_center = np.sqrt(
|
||||||
|
(center_y - self.H/2)**2 + (center_x - self.W/2)**2)
|
||||||
|
car_time = dist_to_center * self.car_move_time_factor
|
||||||
|
|
||||||
|
# 更新总时间
|
||||||
|
task_time = max(flight_time + car_time, comp_bs_time)
|
||||||
|
total_time = max(total_time, task_time)
|
||||||
|
|
||||||
|
if not valid_partition:
|
||||||
|
break
|
||||||
|
|
||||||
|
# 计算奖励
|
||||||
|
if not valid_partition:
|
||||||
|
reward = -10000 # 惩罚无效方案
|
||||||
|
done = True
|
||||||
|
else:
|
||||||
|
reward = -total_time # 负的完成时间作为奖励
|
||||||
|
done = self.current_step >= self.max_steps
|
||||||
|
|
||||||
|
# 更新状态
|
||||||
|
self.state = np.array([v_cuts, h_cuts, total_time])
|
||||||
|
|
||||||
|
return self.state, reward, done, {}
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
# 初始化状态
|
||||||
|
self.state = np.array([1, 1, 0])
|
||||||
|
self.current_step = 0
|
||||||
|
return self.state
|
||||||
|
|
||||||
|
def render(self, mode='human'):
|
||||||
|
pass
|
95
run_dqn.py
Normal file
95
run_dqn.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
from env import Env
|
||||||
|
from dqn import Agent
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
def train():
|
||||||
|
# 创建环境和智能体
|
||||||
|
env = Env()
|
||||||
|
state_dim = env.observation_space.shape[0]
|
||||||
|
action_dim = 10 # len(垂直切割数)+len(水平切割数)
|
||||||
|
|
||||||
|
agent = Agent(state_dim, action_dim)
|
||||||
|
|
||||||
|
# 训练参数
|
||||||
|
episodes = 1000
|
||||||
|
max_steps = 1000
|
||||||
|
|
||||||
|
# 记录训练过程
|
||||||
|
rewards_history = []
|
||||||
|
best_reward = float('-inf')
|
||||||
|
best_solution = None
|
||||||
|
|
||||||
|
# 开始训练
|
||||||
|
for episode in range(episodes):
|
||||||
|
state = env.reset()
|
||||||
|
episode_reward = 0
|
||||||
|
|
||||||
|
for step in range(max_steps):
|
||||||
|
# 选择动作
|
||||||
|
action = agent.choose_action(state)
|
||||||
|
|
||||||
|
# 执行动作
|
||||||
|
next_state, reward, done, _ = env.step(action)
|
||||||
|
|
||||||
|
# 存储经验
|
||||||
|
agent.store_transition(state, action, reward, next_state, done)
|
||||||
|
|
||||||
|
# 学习
|
||||||
|
agent.learn()
|
||||||
|
|
||||||
|
episode_reward += reward
|
||||||
|
state = next_state
|
||||||
|
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
|
||||||
|
# 记录每个episode的总奖励
|
||||||
|
rewards_history.append(episode_reward)
|
||||||
|
|
||||||
|
# 更新最佳解
|
||||||
|
if episode_reward > best_reward:
|
||||||
|
best_reward = episode_reward
|
||||||
|
best_solution = {
|
||||||
|
'vertical_cuts': int(action[0]),
|
||||||
|
'horizontal_cuts': int(action[1]),
|
||||||
|
# 'offload_ratio': action[2],
|
||||||
|
'total_time': -reward if reward != -1000 else float('inf'),
|
||||||
|
'episode': episode
|
||||||
|
}
|
||||||
|
|
||||||
|
# 打印训练进度
|
||||||
|
if (episode + 1) % 10 == 0:
|
||||||
|
avg_reward = np.mean(rewards_history[-10:])
|
||||||
|
print(f"Episode {episode + 1}, Average Reward: {avg_reward:.2f}")
|
||||||
|
|
||||||
|
return best_solution, rewards_history
|
||||||
|
|
||||||
|
|
||||||
|
def plot_training_results(rewards_history):
|
||||||
|
plt.figure(figsize=(10, 5))
|
||||||
|
plt.plot(rewards_history)
|
||||||
|
plt.title('Training Progress')
|
||||||
|
plt.xlabel('Episode')
|
||||||
|
plt.ylabel('Total Reward')
|
||||||
|
plt.grid(True)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def print_solution(solution):
|
||||||
|
print("\n最佳解决方案:")
|
||||||
|
print(f"在第 {solution['episode']} 轮找到")
|
||||||
|
print(f"垂直切割数: {solution['vertical_cuts']}")
|
||||||
|
print(f"水平切割数: {solution['horizontal_cuts']}")
|
||||||
|
print(f"任务卸载率: {solution['offload_ratio']:.2f}")
|
||||||
|
print(f"总完成时间: {solution['total_time']:.2f} 秒")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# 训练模型
|
||||||
|
best_solution, rewards_history = train()
|
||||||
|
|
||||||
|
# 显示结果
|
||||||
|
plot_training_results(rewards_history)
|
||||||
|
print_solution(best_solution)
|
Loading…
Reference in New Issue
Block a user