HPCC2025/DQN/run_hierarchical.py
2025-03-09 16:53:01 +08:00

119 lines
3.6 KiB
Python

from env_partition import PartitionEnv
from env_allocation import AllocationEnv
from env_routing import RoutingEnv
from dqn import Agent
import numpy as np
import matplotlib.pyplot as plt
def train_hierarchical():
"""训练分层强化学习系统"""
# 创建第一层环境(区域划分)
partition_env = PartitionEnv()
partition_state_dim = partition_env.observation_space.shape[0]
partition_action_dim = 10 # 5个垂直切割选项 + 5个水平切割选项
partition_agent = Agent(partition_state_dim, partition_action_dim)
# 训练参数
episodes = 1000
max_steps = 1000
# 记录训练过程
rewards_history = []
best_reward = float('-inf')
best_solution = None
# 开始训练
print("开始训练分层强化学习系统...")
for episode in range(episodes):
state = partition_env.reset()
episode_reward = 0
for step in range(max_steps):
# 选择动作
action = partition_agent.choose_action(state)
# 执行动作(这会触发第二层和第三层的优化)
next_state, reward, done, _ = partition_env.step(action)
# 存储经验
partition_agent.store_transition(state, action, reward, next_state, done)
# 学习
partition_agent.learn()
episode_reward += reward
state = next_state
if done:
break
# 记录每个episode的总奖励
rewards_history.append(episode_reward)
# 更新最佳解
if episode_reward > best_reward:
best_reward = episode_reward
best_solution = {
'vertical_cuts': int(action[0]),
'horizontal_cuts': int(action[1]),
'total_time': -reward if reward != -10000 else float('inf'),
'episode': episode
}
# 打印训练进度
if (episode + 1) % 10 == 0:
avg_reward = np.mean(rewards_history[-10:])
print(f"Episode {episode + 1}, Average Reward: {avg_reward:.2f}")
return best_solution, rewards_history
def plot_training_results(rewards_history):
plt.figure(figsize=(10, 5))
plt.plot(rewards_history)
plt.title('Hierarchical DQN Training Progress')
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.grid(True)
plt.show()
def print_solution(solution):
print("\n最佳解决方案:")
print(f"在第 {solution['episode']} 轮找到")
print(f"垂直切割数: {solution['vertical_cuts']}")
print(f"水平切割数: {solution['horizontal_cuts']}")
print(f"总完成时间: {solution['total_time']:.2f}")
def visualize_partition(solution):
"""可视化区域划分结果"""
H, W = 20, 25
v_cuts = solution['vertical_cuts']
h_cuts = solution['horizontal_cuts']
plt.figure(figsize=(10, 8))
# 绘制网格
for i in range(v_cuts + 1):
y = i * (H / v_cuts)
plt.axhline(y=y, color='b', linestyle='-', alpha=0.5)
for i in range(h_cuts + 1):
x = i * (W / h_cuts)
plt.axvline(x=x, color='b', linestyle='-', alpha=0.5)
plt.title('Area Partition Visualization')
plt.xlabel('Width')
plt.ylabel('Height')
plt.grid(True, alpha=0.3)
plt.show()
if __name__ == "__main__":
# 训练模型
best_solution, rewards_history = train_hierarchical()
# 显示结果
plot_training_results(rewards_history)
print_solution(best_solution)
visualize_partition(best_solution)