调整eval的输出
This commit is contained in:
parent
2362de4c54
commit
c96c36d4cd
@ -72,11 +72,11 @@ def main():
|
|||||||
|
|
||||||
# Seed Everything
|
# Seed Everything
|
||||||
env_seed = opt.seed
|
env_seed = opt.seed
|
||||||
torch.manual_seed(opt.seed)
|
# torch.manual_seed(opt.seed)
|
||||||
torch.cuda.manual_seed(opt.seed)
|
# torch.cuda.manual_seed(opt.seed)
|
||||||
torch.backends.cudnn.deterministic = True
|
# torch.backends.cudnn.deterministic = True
|
||||||
torch.backends.cudnn.benchmark = False
|
# torch.backends.cudnn.benchmark = False
|
||||||
print("Random Seed: {}".format(opt.seed))
|
# print("Random Seed: {}".format(opt.seed))
|
||||||
|
|
||||||
# Build SummaryWriter to record training curves
|
# Build SummaryWriter to record training curves
|
||||||
if opt.write:
|
if opt.write:
|
||||||
|
@ -2,6 +2,7 @@ import torch.nn.functional as F
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import argparse
|
import argparse
|
||||||
import torch
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
class Actor(nn.Module):
|
class Actor(nn.Module):
|
||||||
def __init__(self, state_dim, action_dim, net_width, maxaction):
|
def __init__(self, state_dim, action_dim, net_width, maxaction):
|
||||||
@ -40,14 +41,16 @@ def evaluate_policy(env, agent, turns = 3):
|
|||||||
for j in range(turns):
|
for j in range(turns):
|
||||||
s = env.reset()
|
s = env.reset()
|
||||||
done = False
|
done = False
|
||||||
|
action_series = []
|
||||||
while not done:
|
while not done:
|
||||||
# Take deterministic actions at test time
|
# Take deterministic actions at test time
|
||||||
a = agent.select_action(s, deterministic=True)
|
a = agent.select_action(s, deterministic=True)
|
||||||
s_next, r, dw, tr, info = env.step(a)
|
s_next, r, dw, tr, info = env.step(a)
|
||||||
done = (dw or tr)
|
done = (dw or tr)
|
||||||
|
action_series.append(a[0])
|
||||||
total_scores += r
|
total_scores += r
|
||||||
s = s_next
|
s = s_next
|
||||||
|
print(np.round(action_series, 3))
|
||||||
return int(total_scores/turns)
|
return int(total_scores/turns)
|
||||||
|
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ class DQN_agent(object):
|
|||||||
if state[0][0] == 0:
|
if state[0][0] == 0:
|
||||||
a = np.random.randint(0,10)
|
a = np.random.randint(0,10)
|
||||||
else:
|
else:
|
||||||
a = np.random.randint(10,13)
|
a = np.random.randint(10,14)
|
||||||
else:
|
else:
|
||||||
if state[0][0] == 0:
|
if state[0][0] == 0:
|
||||||
q_value = self.q_net(state)
|
q_value = self.q_net(state)
|
||||||
|
@ -3,6 +3,7 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
import argparse
|
import argparse
|
||||||
import torch
|
import torch
|
||||||
|
import numpy as np
|
||||||
import sys
|
import sys
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
from env_dis import PartitionMazeEnv
|
from env_dis import PartitionMazeEnv
|
||||||
@ -111,19 +112,20 @@ def main():
|
|||||||
print('EnvName:', BriefEnvName[opt.EnvIdex],
|
print('EnvName:', BriefEnvName[opt.EnvIdex],
|
||||||
'seed:', opt.seed, 'score:', score)
|
'seed:', opt.seed, 'score:', score)
|
||||||
else:
|
else:
|
||||||
total_steps = 1
|
total_steps = 0
|
||||||
while total_steps < opt.Max_train_steps:
|
while total_steps < opt.Max_train_steps:
|
||||||
# Do not use opt.seed directly, or it can overfit to opt.seed
|
# Do not use opt.seed directly, or it can overfit to opt.seed
|
||||||
s = env.reset(seed=env_seed)
|
s = env.reset(seed=env_seed)
|
||||||
env_seed += 1
|
|
||||||
done = False
|
done = False
|
||||||
|
|
||||||
'''Interact & trian'''
|
'''Interact & trian'''
|
||||||
while not done:
|
while not done:
|
||||||
# e-greedy exploration
|
# e-greedy exploration
|
||||||
if total_steps < opt.random_steps:
|
if total_steps < opt.random_steps:
|
||||||
# TODO sample取值有问题
|
if s[0] == 0:
|
||||||
a = env.action_space.sample()
|
a = np.random.randint(0, 10)
|
||||||
|
else:
|
||||||
|
a = np.random.randint(10, 14)
|
||||||
else:
|
else:
|
||||||
a = agent.select_action(s, deterministic=False)
|
a = agent.select_action(s, deterministic=False)
|
||||||
s_next, r, dw, tr, info = env.step(a)
|
s_next, r, dw, tr, info = env.step(a)
|
||||||
|
13
env.py
13
env.py
@ -39,8 +39,9 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
##############################
|
##############################
|
||||||
# 可能需要手动修改的超参数
|
# 可能需要手动修改的超参数
|
||||||
##############################
|
##############################
|
||||||
self.CUT_NUM = 6 # 横切一半,竖切一半
|
self.CUT_NUM = 4 # 横切一半,竖切一半
|
||||||
self.BASE_LINE = 12000 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
self.BASE_LINE = 4000 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
||||||
|
self.MAX_STEPS = 200 # 迷宫走法步数上限
|
||||||
|
|
||||||
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
||||||
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
||||||
@ -65,7 +66,6 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
self.init_maze_step = 0
|
self.init_maze_step = 0
|
||||||
|
|
||||||
# 路径规划阶段相关变量
|
# 路径规划阶段相关变量
|
||||||
self.MAX_STEPS = 50 # 迷宫走法步数上限
|
|
||||||
self.step_count = 0
|
self.step_count = 0
|
||||||
self.rectangles = {}
|
self.rectangles = {}
|
||||||
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
|
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
|
||||||
@ -159,16 +159,11 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
else:
|
else:
|
||||||
# 进入阶段 1:初始化迷宫
|
# 进入阶段 1:初始化迷宫
|
||||||
self.phase = 1
|
self.phase = 1
|
||||||
state = np.concatenate(
|
|
||||||
[self.partition_values, np.array(self.car_pos).flatten()])
|
|
||||||
reward = 10
|
reward = 10
|
||||||
|
|
||||||
# 构建反向索引,方便后续计算
|
# 构建反向索引,方便后续计算
|
||||||
self.reverse_rectangles = {v['center']: k for k, v in self.rectangles.items()}
|
self.reverse_rectangles = {v['center']: k for k, v in self.rectangles.items()}
|
||||||
return state, reward, False, False, {}
|
|
||||||
|
|
||||||
elif self.phase == 1:
|
|
||||||
# 阶段 1:初始化迷宫,让多个车辆从区域中心出发,前往最近的几个区域中心点
|
|
||||||
region_centers = [
|
region_centers = [
|
||||||
(i, j, self.rectangles[(i, j)]['center'])
|
(i, j, self.rectangles[(i, j)]['center'])
|
||||||
for i in range(len(self.row_cuts) - 1)
|
for i in range(len(self.row_cuts) - 1)
|
||||||
@ -191,7 +186,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
state = np.concatenate(
|
state = np.concatenate(
|
||||||
[self.partition_values, np.array(self.car_pos).flatten()]
|
[self.partition_values, np.array(self.car_pos).flatten()]
|
||||||
)
|
)
|
||||||
return state, 0.0, False, False, {}
|
return state, reward, False, False, {}
|
||||||
|
|
||||||
elif self.phase == 2:
|
elif self.phase == 2:
|
||||||
# 阶段 2:路径规划(走迷宫)
|
# 阶段 2:路径规划(走迷宫)
|
||||||
|
18
env_dis.py
18
env_dis.py
@ -47,9 +47,9 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
self.partition_values = np.zeros(
|
self.partition_values = np.zeros(
|
||||||
self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂
|
self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂
|
||||||
|
|
||||||
# 定义动作空间:长度为 14 的离散动作空间
|
# 定义动作空间:长度为 15 的离散动作空间
|
||||||
# 前 10 个表示切分动作 {0, 0.1, ..., 0.9},后 4 个表示上下左右移动
|
# 前 10 个表示切分动作 {0, 0.1, ..., 0.9},后 5 个表示上下左右移动和保持不动
|
||||||
self.action_space = spaces.Discrete(14)
|
self.action_space = spaces.Discrete(15)
|
||||||
|
|
||||||
# 定义观察空间为8维向量
|
# 定义观察空间为8维向量
|
||||||
# TODO 返回的状态目前只有位置坐标
|
# TODO 返回的状态目前只有位置坐标
|
||||||
@ -156,19 +156,14 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
[[self.phase], self.partition_values, np.zeros(np.array(self.car_pos).flatten().shape[0], dtype=np.float32)])
|
[[self.phase], self.partition_values, np.zeros(np.array(self.car_pos).flatten().shape[0], dtype=np.float32)])
|
||||||
return state, reward, True, False, {}
|
return state, reward, True, False, {}
|
||||||
else:
|
else:
|
||||||
# 进入阶段 1:初始化迷宫
|
# 初始化迷宫
|
||||||
self.phase = 1
|
self.phase = 1
|
||||||
state = np.concatenate(
|
|
||||||
[[self.phase], self.partition_values, np.array(self.car_pos).flatten()])
|
|
||||||
reward = 10
|
reward = 10
|
||||||
|
|
||||||
# 构建反向索引,方便后续计算
|
# 构建反向索引,方便后续计算
|
||||||
self.reverse_rectangles = {
|
self.reverse_rectangles = {
|
||||||
v['center']: k for k, v in self.rectangles.items()}
|
v['center']: k for k, v in self.rectangles.items()}
|
||||||
return state, reward, False, False, {}
|
|
||||||
|
|
||||||
elif self.phase == 1:
|
|
||||||
# TODO 阶段一可以不写出来!!!
|
|
||||||
# 阶段 1:初始化迷宫,让多个车辆从区域中心出发,前往最近的几个区域中心点
|
# 阶段 1:初始化迷宫,让多个车辆从区域中心出发,前往最近的几个区域中心点
|
||||||
region_centers = [
|
region_centers = [
|
||||||
(i, j, self.rectangles[(i, j)]['center'])
|
(i, j, self.rectangles[(i, j)]['center'])
|
||||||
@ -193,7 +188,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
[[self.phase], self.partition_values,
|
[[self.phase], self.partition_values,
|
||||||
np.array(self.car_pos).flatten()]
|
np.array(self.car_pos).flatten()]
|
||||||
)
|
)
|
||||||
return state, 0.0, False, False, {}
|
return state, reward, False, False, {}
|
||||||
|
|
||||||
elif self.phase == 2:
|
elif self.phase == 2:
|
||||||
# 阶段 2:路径规划(走迷宫)
|
# 阶段 2:路径规划(走迷宫)
|
||||||
@ -212,6 +207,9 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
new_col = current_col - 1
|
new_col = current_col - 1
|
||||||
elif action == 13 and current_col < len(self.col_cuts) - 2: # 右
|
elif action == 13 and current_col < len(self.col_cuts) - 2: # 右
|
||||||
new_col = new_col + 1
|
new_col = new_col + 1
|
||||||
|
else:
|
||||||
|
# 无效动作,保持原地
|
||||||
|
pass
|
||||||
|
|
||||||
# 更新车辆位置
|
# 更新车辆位置
|
||||||
self.car_pos[current_car] = self.rectangles[(
|
self.car_pos[current_car] = self.rectangles[(
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
# from env import PartitionMazeEnv
|
from env import PartitionMazeEnv
|
||||||
from env_dis import PartitionMazeEnv
|
# from env_dis import PartitionMazeEnv
|
||||||
|
|
||||||
env = PartitionMazeEnv()
|
env = PartitionMazeEnv()
|
||||||
|
|
||||||
state = env.reset()
|
state = env.reset()
|
||||||
print(state)
|
print(state)
|
||||||
|
|
||||||
action_series = [0, 0, 3, 0, 0, 10]
|
action_series = [[0.1], [0.2], [0.4], [0], [0.1]]
|
||||||
|
# action_series = [0, 0, 3, 0, 0, 10]
|
||||||
|
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
action = action_series[i]
|
action = action_series[i]
|
||||||
|
16
params2.yml
Normal file
16
params2.yml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
H : 50 # 区域高度,网格点之间的距离为25m(单位距离)
|
||||||
|
W : 50 # 区域宽度
|
||||||
|
num_cars : 3 # 系统数量(车-巢-机系统个数)
|
||||||
|
|
||||||
|
# 时间系数(单位:秒,每个网格一张照片)
|
||||||
|
flight_time_factor : 3 # 每张照片对应的飞行时间,无人机飞行速度为9.5m/s,拍摄照片的时间间隔为3s
|
||||||
|
comp_time_factor : 5 # 无人机上每张照片计算时间,5s
|
||||||
|
trans_time_factor : 0.3 # 每张照片传输时间,0.3s
|
||||||
|
car_time_factor : 100 # TODO 汽车每单位距离的移动时间,2s,加了一个放大因子50
|
||||||
|
bs_time_factor : 5 # 机巢上每张照片计算时间
|
||||||
|
|
||||||
|
# 其他参数
|
||||||
|
flight_energy_factor : 0.05 # 单位:分钟/张
|
||||||
|
comp_energy_factor : 0.05 # TODO 计算能耗需要重新估计
|
||||||
|
trans_energy_factor : 0.0025
|
||||||
|
battery_energy_capacity : 20 # 无人机只进行飞行,续航为30分钟
|
Loading…
Reference in New Issue
Block a user