调整eval的输出
This commit is contained in:
parent
2362de4c54
commit
c96c36d4cd
@ -72,11 +72,11 @@ def main():
|
||||
|
||||
# Seed Everything
|
||||
env_seed = opt.seed
|
||||
torch.manual_seed(opt.seed)
|
||||
torch.cuda.manual_seed(opt.seed)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cudnn.benchmark = False
|
||||
print("Random Seed: {}".format(opt.seed))
|
||||
# torch.manual_seed(opt.seed)
|
||||
# torch.cuda.manual_seed(opt.seed)
|
||||
# torch.backends.cudnn.deterministic = True
|
||||
# torch.backends.cudnn.benchmark = False
|
||||
# print("Random Seed: {}".format(opt.seed))
|
||||
|
||||
# Build SummaryWriter to record training curves
|
||||
if opt.write:
|
||||
|
@ -2,6 +2,7 @@ import torch.nn.functional as F
|
||||
import torch.nn as nn
|
||||
import argparse
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
class Actor(nn.Module):
|
||||
def __init__(self, state_dim, action_dim, net_width, maxaction):
|
||||
@ -40,14 +41,16 @@ def evaluate_policy(env, agent, turns = 3):
|
||||
for j in range(turns):
|
||||
s = env.reset()
|
||||
done = False
|
||||
action_series = []
|
||||
while not done:
|
||||
# Take deterministic actions at test time
|
||||
a = agent.select_action(s, deterministic=True)
|
||||
s_next, r, dw, tr, info = env.step(a)
|
||||
done = (dw or tr)
|
||||
|
||||
action_series.append(a[0])
|
||||
total_scores += r
|
||||
s = s_next
|
||||
print(np.round(action_series, 3))
|
||||
return int(total_scores/turns)
|
||||
|
||||
|
||||
|
@ -65,7 +65,7 @@ class DQN_agent(object):
|
||||
if state[0][0] == 0:
|
||||
a = np.random.randint(0,10)
|
||||
else:
|
||||
a = np.random.randint(10,13)
|
||||
a = np.random.randint(10,14)
|
||||
else:
|
||||
if state[0][0] == 0:
|
||||
q_value = self.q_net(state)
|
||||
|
@ -3,6 +3,7 @@ import os
|
||||
import shutil
|
||||
import argparse
|
||||
import torch
|
||||
import numpy as np
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from env_dis import PartitionMazeEnv
|
||||
@ -111,19 +112,20 @@ def main():
|
||||
print('EnvName:', BriefEnvName[opt.EnvIdex],
|
||||
'seed:', opt.seed, 'score:', score)
|
||||
else:
|
||||
total_steps = 1
|
||||
total_steps = 0
|
||||
while total_steps < opt.Max_train_steps:
|
||||
# Do not use opt.seed directly, or it can overfit to opt.seed
|
||||
s = env.reset(seed=env_seed)
|
||||
env_seed += 1
|
||||
done = False
|
||||
|
||||
'''Interact & trian'''
|
||||
while not done:
|
||||
# e-greedy exploration
|
||||
if total_steps < opt.random_steps:
|
||||
# TODO sample取值有问题
|
||||
a = env.action_space.sample()
|
||||
if s[0] == 0:
|
||||
a = np.random.randint(0, 10)
|
||||
else:
|
||||
a = np.random.randint(10, 14)
|
||||
else:
|
||||
a = agent.select_action(s, deterministic=False)
|
||||
s_next, r, dw, tr, info = env.step(a)
|
||||
|
57
env.py
57
env.py
@ -39,8 +39,9 @@ class PartitionMazeEnv(gym.Env):
|
||||
##############################
|
||||
# 可能需要手动修改的超参数
|
||||
##############################
|
||||
self.CUT_NUM = 6 # 横切一半,竖切一半
|
||||
self.BASE_LINE = 12000 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
||||
self.CUT_NUM = 4 # 横切一半,竖切一半
|
||||
self.BASE_LINE = 4000 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
||||
self.MAX_STEPS = 200 # 迷宫走法步数上限
|
||||
|
||||
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
||||
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
||||
@ -65,7 +66,6 @@ class PartitionMazeEnv(gym.Env):
|
||||
self.init_maze_step = 0
|
||||
|
||||
# 路径规划阶段相关变量
|
||||
self.MAX_STEPS = 50 # 迷宫走法步数上限
|
||||
self.step_count = 0
|
||||
self.rectangles = {}
|
||||
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
|
||||
@ -159,40 +159,35 @@ class PartitionMazeEnv(gym.Env):
|
||||
else:
|
||||
# 进入阶段 1:初始化迷宫
|
||||
self.phase = 1
|
||||
state = np.concatenate(
|
||||
[self.partition_values, np.array(self.car_pos).flatten()])
|
||||
reward = 10
|
||||
|
||||
# 构建反向索引,方便后续计算
|
||||
self.reverse_rectangles = {v['center']: k for k, v in self.rectangles.items()}
|
||||
|
||||
region_centers = [
|
||||
(i, j, self.rectangles[(i, j)]['center'])
|
||||
for i in range(len(self.row_cuts) - 1)
|
||||
for j in range(len(self.col_cuts) - 1)
|
||||
]
|
||||
# 按照与区域中心的距离从近到远排序
|
||||
region_centers.sort(
|
||||
key=lambda x: math.dist(x[2], (self.H / 2, self.W / 2))
|
||||
)
|
||||
|
||||
# 分配最近的区域给每辆车
|
||||
for idx in range(self.num_cars):
|
||||
i, j, center = region_centers[idx]
|
||||
self.car_pos[idx] = center
|
||||
self.car_traj[idx].append((i, j))
|
||||
self.rectangles[(i, j)]['is_visited'] = True
|
||||
|
||||
# 进入阶段 2:走迷宫
|
||||
self.phase = 2
|
||||
state = np.concatenate(
|
||||
[self.partition_values, np.array(self.car_pos).flatten()]
|
||||
)
|
||||
return state, reward, False, False, {}
|
||||
|
||||
elif self.phase == 1:
|
||||
# 阶段 1:初始化迷宫,让多个车辆从区域中心出发,前往最近的几个区域中心点
|
||||
region_centers = [
|
||||
(i, j, self.rectangles[(i, j)]['center'])
|
||||
for i in range(len(self.row_cuts) - 1)
|
||||
for j in range(len(self.col_cuts) - 1)
|
||||
]
|
||||
# 按照与区域中心的距离从近到远排序
|
||||
region_centers.sort(
|
||||
key=lambda x: math.dist(x[2], (self.H / 2, self.W / 2))
|
||||
)
|
||||
|
||||
# 分配最近的区域给每辆车
|
||||
for idx in range(self.num_cars):
|
||||
i, j, center = region_centers[idx]
|
||||
self.car_pos[idx] = center
|
||||
self.car_traj[idx].append((i, j))
|
||||
self.rectangles[(i, j)]['is_visited'] = True
|
||||
|
||||
# 进入阶段 2:走迷宫
|
||||
self.phase = 2
|
||||
state = np.concatenate(
|
||||
[self.partition_values, np.array(self.car_pos).flatten()]
|
||||
)
|
||||
return state, 0.0, False, False, {}
|
||||
|
||||
elif self.phase == 2:
|
||||
# 阶段 2:路径规划(走迷宫)
|
||||
current_car = self.current_car_index
|
||||
|
66
env_dis.py
66
env_dis.py
@ -47,9 +47,9 @@ class PartitionMazeEnv(gym.Env):
|
||||
self.partition_values = np.zeros(
|
||||
self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂
|
||||
|
||||
# 定义动作空间:长度为 14 的离散动作空间
|
||||
# 前 10 个表示切分动作 {0, 0.1, ..., 0.9},后 4 个表示上下左右移动
|
||||
self.action_space = spaces.Discrete(14)
|
||||
# 定义动作空间:长度为 15 的离散动作空间
|
||||
# 前 10 个表示切分动作 {0, 0.1, ..., 0.9},后 5 个表示上下左右移动和保持不动
|
||||
self.action_space = spaces.Discrete(15)
|
||||
|
||||
# 定义观察空间为8维向量
|
||||
# TODO 返回的状态目前只有位置坐标
|
||||
@ -156,45 +156,40 @@ class PartitionMazeEnv(gym.Env):
|
||||
[[self.phase], self.partition_values, np.zeros(np.array(self.car_pos).flatten().shape[0], dtype=np.float32)])
|
||||
return state, reward, True, False, {}
|
||||
else:
|
||||
# 进入阶段 1:初始化迷宫
|
||||
# 初始化迷宫
|
||||
self.phase = 1
|
||||
state = np.concatenate(
|
||||
[[self.phase], self.partition_values, np.array(self.car_pos).flatten()])
|
||||
reward = 10
|
||||
|
||||
# 构建反向索引,方便后续计算
|
||||
self.reverse_rectangles = {
|
||||
v['center']: k for k, v in self.rectangles.items()}
|
||||
|
||||
# 阶段 1:初始化迷宫,让多个车辆从区域中心出发,前往最近的几个区域中心点
|
||||
region_centers = [
|
||||
(i, j, self.rectangles[(i, j)]['center'])
|
||||
for i in range(len(self.row_cuts) - 1)
|
||||
for j in range(len(self.col_cuts) - 1)
|
||||
]
|
||||
# 按照与区域中心的距离从近到远排序
|
||||
region_centers.sort(
|
||||
key=lambda x: math.dist(x[2], (self.H / 2, self.W / 2))
|
||||
)
|
||||
|
||||
# 分配最近的区域给每辆车
|
||||
for idx in range(self.num_cars):
|
||||
i, j, center = region_centers[idx]
|
||||
self.car_pos[idx] = center
|
||||
self.car_traj[idx].append((i, j))
|
||||
self.rectangles[(i, j)]['is_visited'] = True
|
||||
|
||||
# 进入阶段 2:走迷宫
|
||||
self.phase = 2
|
||||
state = np.concatenate(
|
||||
[[self.phase], self.partition_values,
|
||||
np.array(self.car_pos).flatten()]
|
||||
)
|
||||
return state, reward, False, False, {}
|
||||
|
||||
elif self.phase == 1:
|
||||
# TODO 阶段一可以不写出来!!!
|
||||
# 阶段 1:初始化迷宫,让多个车辆从区域中心出发,前往最近的几个区域中心点
|
||||
region_centers = [
|
||||
(i, j, self.rectangles[(i, j)]['center'])
|
||||
for i in range(len(self.row_cuts) - 1)
|
||||
for j in range(len(self.col_cuts) - 1)
|
||||
]
|
||||
# 按照与区域中心的距离从近到远排序
|
||||
region_centers.sort(
|
||||
key=lambda x: math.dist(x[2], (self.H / 2, self.W / 2))
|
||||
)
|
||||
|
||||
# 分配最近的区域给每辆车
|
||||
for idx in range(self.num_cars):
|
||||
i, j, center = region_centers[idx]
|
||||
self.car_pos[idx] = center
|
||||
self.car_traj[idx].append((i, j))
|
||||
self.rectangles[(i, j)]['is_visited'] = True
|
||||
|
||||
# 进入阶段 2:走迷宫
|
||||
self.phase = 2
|
||||
state = np.concatenate(
|
||||
[[self.phase], self.partition_values,
|
||||
np.array(self.car_pos).flatten()]
|
||||
)
|
||||
return state, 0.0, False, False, {}
|
||||
|
||||
elif self.phase == 2:
|
||||
# 阶段 2:路径规划(走迷宫)
|
||||
# 后 4 个动作对应上下左右移动
|
||||
@ -212,6 +207,9 @@ class PartitionMazeEnv(gym.Env):
|
||||
new_col = current_col - 1
|
||||
elif action == 13 and current_col < len(self.col_cuts) - 2: # 右
|
||||
new_col = new_col + 1
|
||||
else:
|
||||
# 无效动作,保持原地
|
||||
pass
|
||||
|
||||
# 更新车辆位置
|
||||
self.car_pos[current_car] = self.rectangles[(
|
||||
|
@ -1,12 +1,13 @@
|
||||
# from env import PartitionMazeEnv
|
||||
from env_dis import PartitionMazeEnv
|
||||
from env import PartitionMazeEnv
|
||||
# from env_dis import PartitionMazeEnv
|
||||
|
||||
env = PartitionMazeEnv()
|
||||
|
||||
state = env.reset()
|
||||
print(state)
|
||||
|
||||
action_series = [0, 0, 3, 0, 0, 10]
|
||||
action_series = [[0.1], [0.2], [0.4], [0], [0.1]]
|
||||
# action_series = [0, 0, 3, 0, 0, 10]
|
||||
|
||||
for i in range(100):
|
||||
action = action_series[i]
|
||||
|
16
params2.yml
Normal file
16
params2.yml
Normal file
@ -0,0 +1,16 @@
|
||||
H : 50 # 区域高度,网格点之间的距离为25m(单位距离)
|
||||
W : 50 # 区域宽度
|
||||
num_cars : 3 # 系统数量(车-巢-机系统个数)
|
||||
|
||||
# 时间系数(单位:秒,每个网格一张照片)
|
||||
flight_time_factor : 3 # 每张照片对应的飞行时间,无人机飞行速度为9.5m/s,拍摄照片的时间间隔为3s
|
||||
comp_time_factor : 5 # 无人机上每张照片计算时间,5s
|
||||
trans_time_factor : 0.3 # 每张照片传输时间,0.3s
|
||||
car_time_factor : 100 # TODO 汽车每单位距离的移动时间,2s,加了一个放大因子50
|
||||
bs_time_factor : 5 # 机巢上每张照片计算时间
|
||||
|
||||
# 其他参数
|
||||
flight_energy_factor : 0.05 # 单位:分钟/张
|
||||
comp_energy_factor : 0.05 # TODO 计算能耗需要重新估计
|
||||
trans_energy_factor : 0.0025
|
||||
battery_energy_capacity : 20 # 无人机只进行飞行,续航为30分钟
|
Loading…
Reference in New Issue
Block a user