119 lines
3.6 KiB
Python
119 lines
3.6 KiB
Python
from env_partition import PartitionEnv
|
|
from env_allocation import AllocationEnv
|
|
from env_routing import RoutingEnv
|
|
from dqn import Agent
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
|
|
def train_hierarchical():
|
|
"""训练分层强化学习系统"""
|
|
# 创建第一层环境(区域划分)
|
|
partition_env = PartitionEnv()
|
|
partition_state_dim = partition_env.observation_space.shape[0]
|
|
partition_action_dim = 10 # 5个垂直切割选项 + 5个水平切割选项
|
|
|
|
partition_agent = Agent(partition_state_dim, partition_action_dim)
|
|
|
|
# 训练参数
|
|
episodes = 1000
|
|
max_steps = 1000
|
|
|
|
# 记录训练过程
|
|
rewards_history = []
|
|
best_reward = float('-inf')
|
|
best_solution = None
|
|
|
|
# 开始训练
|
|
print("开始训练分层强化学习系统...")
|
|
|
|
for episode in range(episodes):
|
|
state = partition_env.reset()
|
|
episode_reward = 0
|
|
|
|
for step in range(max_steps):
|
|
# 选择动作
|
|
action = partition_agent.choose_action(state)
|
|
|
|
# 执行动作(这会触发第二层和第三层的优化)
|
|
next_state, reward, done, _ = partition_env.step(action)
|
|
|
|
# 存储经验
|
|
partition_agent.store_transition(state, action, reward, next_state, done)
|
|
|
|
# 学习
|
|
partition_agent.learn()
|
|
|
|
episode_reward += reward
|
|
state = next_state
|
|
|
|
if done:
|
|
break
|
|
|
|
# 记录每个episode的总奖励
|
|
rewards_history.append(episode_reward)
|
|
|
|
# 更新最佳解
|
|
if episode_reward > best_reward:
|
|
best_reward = episode_reward
|
|
best_solution = {
|
|
'vertical_cuts': int(action[0]),
|
|
'horizontal_cuts': int(action[1]),
|
|
'total_time': -reward if reward != -10000 else float('inf'),
|
|
'episode': episode
|
|
}
|
|
|
|
# 打印训练进度
|
|
if (episode + 1) % 10 == 0:
|
|
avg_reward = np.mean(rewards_history[-10:])
|
|
print(f"Episode {episode + 1}, Average Reward: {avg_reward:.2f}")
|
|
|
|
return best_solution, rewards_history
|
|
|
|
def plot_training_results(rewards_history):
|
|
plt.figure(figsize=(10, 5))
|
|
plt.plot(rewards_history)
|
|
plt.title('Hierarchical DQN Training Progress')
|
|
plt.xlabel('Episode')
|
|
plt.ylabel('Total Reward')
|
|
plt.grid(True)
|
|
plt.show()
|
|
|
|
def print_solution(solution):
|
|
print("\n最佳解决方案:")
|
|
print(f"在第 {solution['episode']} 轮找到")
|
|
print(f"垂直切割数: {solution['vertical_cuts']}")
|
|
print(f"水平切割数: {solution['horizontal_cuts']}")
|
|
print(f"总完成时间: {solution['total_time']:.2f} 秒")
|
|
|
|
def visualize_partition(solution):
|
|
"""可视化区域划分结果"""
|
|
H, W = 20, 25
|
|
v_cuts = solution['vertical_cuts']
|
|
h_cuts = solution['horizontal_cuts']
|
|
|
|
plt.figure(figsize=(10, 8))
|
|
|
|
# 绘制网格
|
|
for i in range(v_cuts + 1):
|
|
y = i * (H / v_cuts)
|
|
plt.axhline(y=y, color='b', linestyle='-', alpha=0.5)
|
|
|
|
for i in range(h_cuts + 1):
|
|
x = i * (W / h_cuts)
|
|
plt.axvline(x=x, color='b', linestyle='-', alpha=0.5)
|
|
|
|
plt.title('Area Partition Visualization')
|
|
plt.xlabel('Width')
|
|
plt.ylabel('Height')
|
|
plt.grid(True, alpha=0.3)
|
|
plt.show()
|
|
|
|
if __name__ == "__main__":
|
|
# 训练模型
|
|
best_solution, rewards_history = train_hierarchical()
|
|
|
|
# 显示结果
|
|
plot_training_results(rewards_history)
|
|
print_solution(best_solution)
|
|
visualize_partition(best_solution)
|