from env_partition import PartitionEnv from env_allocation import AllocationEnv from env_routing import RoutingEnv from dqn import Agent import numpy as np import matplotlib.pyplot as plt def train_hierarchical(): """训练分层强化学习系统""" # 创建第一层环境(区域划分) partition_env = PartitionEnv() partition_state_dim = partition_env.observation_space.shape[0] partition_action_dim = 10 # 5个垂直切割选项 + 5个水平切割选项 partition_agent = Agent(partition_state_dim, partition_action_dim) # 训练参数 episodes = 1000 max_steps = 1000 # 记录训练过程 rewards_history = [] best_reward = float('-inf') best_solution = None # 开始训练 print("开始训练分层强化学习系统...") for episode in range(episodes): state = partition_env.reset() episode_reward = 0 for step in range(max_steps): # 选择动作 action = partition_agent.choose_action(state) # 执行动作(这会触发第二层和第三层的优化) next_state, reward, done, _ = partition_env.step(action) # 存储经验 partition_agent.store_transition(state, action, reward, next_state, done) # 学习 partition_agent.learn() episode_reward += reward state = next_state if done: break # 记录每个episode的总奖励 rewards_history.append(episode_reward) # 更新最佳解 if episode_reward > best_reward: best_reward = episode_reward best_solution = { 'vertical_cuts': int(action[0]), 'horizontal_cuts': int(action[1]), 'total_time': -reward if reward != -10000 else float('inf'), 'episode': episode } # 打印训练进度 if (episode + 1) % 10 == 0: avg_reward = np.mean(rewards_history[-10:]) print(f"Episode {episode + 1}, Average Reward: {avg_reward:.2f}") return best_solution, rewards_history def plot_training_results(rewards_history): plt.figure(figsize=(10, 5)) plt.plot(rewards_history) plt.title('Hierarchical DQN Training Progress') plt.xlabel('Episode') plt.ylabel('Total Reward') plt.grid(True) plt.show() def print_solution(solution): print("\n最佳解决方案:") print(f"在第 {solution['episode']} 轮找到") print(f"垂直切割数: {solution['vertical_cuts']}") print(f"水平切割数: {solution['horizontal_cuts']}") print(f"总完成时间: {solution['total_time']:.2f} 秒") def visualize_partition(solution): """可视化区域划分结果""" H, W = 20, 25 v_cuts = solution['vertical_cuts'] h_cuts = solution['horizontal_cuts'] plt.figure(figsize=(10, 8)) # 绘制网格 for i in range(v_cuts + 1): y = i * (H / v_cuts) plt.axhline(y=y, color='b', linestyle='-', alpha=0.5) for i in range(h_cuts + 1): x = i * (W / h_cuts) plt.axvline(x=x, color='b', linestyle='-', alpha=0.5) plt.title('Area Partition Visualization') plt.xlabel('Width') plt.ylabel('Height') plt.grid(True, alpha=0.3) plt.show() if __name__ == "__main__": # 训练模型 best_solution, rewards_history = train_hierarchical() # 显示结果 plot_training_results(rewards_history) print_solution(best_solution) visualize_partition(best_solution)