调整eval的输出

This commit is contained in:
weixin_46229132 2025-03-19 10:58:43 +08:00
parent 2362de4c54
commit c96c36d4cd
8 changed files with 94 additions and 79 deletions

View File

@ -72,11 +72,11 @@ def main():
# Seed Everything
env_seed = opt.seed
torch.manual_seed(opt.seed)
torch.cuda.manual_seed(opt.seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
print("Random Seed: {}".format(opt.seed))
# torch.manual_seed(opt.seed)
# torch.cuda.manual_seed(opt.seed)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = False
# print("Random Seed: {}".format(opt.seed))
# Build SummaryWriter to record training curves
if opt.write:

View File

@ -2,6 +2,7 @@ import torch.nn.functional as F
import torch.nn as nn
import argparse
import torch
import numpy as np
class Actor(nn.Module):
def __init__(self, state_dim, action_dim, net_width, maxaction):
@ -40,14 +41,16 @@ def evaluate_policy(env, agent, turns = 3):
for j in range(turns):
s = env.reset()
done = False
action_series = []
while not done:
# Take deterministic actions at test time
a = agent.select_action(s, deterministic=True)
s_next, r, dw, tr, info = env.step(a)
done = (dw or tr)
action_series.append(a[0])
total_scores += r
s = s_next
print(np.round(action_series, 3))
return int(total_scores/turns)

View File

@ -65,7 +65,7 @@ class DQN_agent(object):
if state[0][0] == 0:
a = np.random.randint(0,10)
else:
a = np.random.randint(10,13)
a = np.random.randint(10,14)
else:
if state[0][0] == 0:
q_value = self.q_net(state)

View File

@ -3,6 +3,7 @@ import os
import shutil
import argparse
import torch
import numpy as np
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from env_dis import PartitionMazeEnv
@ -111,19 +112,20 @@ def main():
print('EnvName:', BriefEnvName[opt.EnvIdex],
'seed:', opt.seed, 'score:', score)
else:
total_steps = 1
total_steps = 0
while total_steps < opt.Max_train_steps:
# Do not use opt.seed directly, or it can overfit to opt.seed
s = env.reset(seed=env_seed)
env_seed += 1
done = False
'''Interact & trian'''
while not done:
# e-greedy exploration
if total_steps < opt.random_steps:
# TODO sample取值有问题
a = env.action_space.sample()
if s[0] == 0:
a = np.random.randint(0, 10)
else:
a = np.random.randint(10, 14)
else:
a = agent.select_action(s, deterministic=False)
s_next, r, dw, tr, info = env.step(a)

13
env.py
View File

@ -39,8 +39,9 @@ class PartitionMazeEnv(gym.Env):
##############################
# 可能需要手动修改的超参数
##############################
self.CUT_NUM = 6 # 横切一半,竖切一半
self.BASE_LINE = 12000 # 基准时间通过greedy或者蒙特卡洛计算出来
self.CUT_NUM = 4 # 横切一半,竖切一半
self.BASE_LINE = 4000 # 基准时间通过greedy或者蒙特卡洛计算出来
self.MAX_STEPS = 200 # 迷宫走法步数上限
self.phase = 0 # 阶段控制0区域划分阶段1迷宫初始化阶段2走迷宫阶段
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
@ -65,7 +66,6 @@ class PartitionMazeEnv(gym.Env):
self.init_maze_step = 0
# 路径规划阶段相关变量
self.MAX_STEPS = 50 # 迷宫走法步数上限
self.step_count = 0
self.rectangles = {}
self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
@ -159,16 +159,11 @@ class PartitionMazeEnv(gym.Env):
else:
# 进入阶段 1初始化迷宫
self.phase = 1
state = np.concatenate(
[self.partition_values, np.array(self.car_pos).flatten()])
reward = 10
# 构建反向索引,方便后续计算
self.reverse_rectangles = {v['center']: k for k, v in self.rectangles.items()}
return state, reward, False, False, {}
elif self.phase == 1:
# 阶段 1初始化迷宫让多个车辆从区域中心出发前往最近的几个区域中心点
region_centers = [
(i, j, self.rectangles[(i, j)]['center'])
for i in range(len(self.row_cuts) - 1)
@ -191,7 +186,7 @@ class PartitionMazeEnv(gym.Env):
state = np.concatenate(
[self.partition_values, np.array(self.car_pos).flatten()]
)
return state, 0.0, False, False, {}
return state, reward, False, False, {}
elif self.phase == 2:
# 阶段 2路径规划走迷宫

View File

@ -47,9 +47,9 @@ class PartitionMazeEnv(gym.Env):
self.partition_values = np.zeros(
self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂
# 定义动作空间:长度为 14 的离散动作空间
# 前 10 个表示切分动作 {0, 0.1, ..., 0.9},后 4 个表示上下左右移
self.action_space = spaces.Discrete(14)
# 定义动作空间:长度为 15 的离散动作空间
# 前 10 个表示切分动作 {0, 0.1, ..., 0.9},后 5 个表示上下左右移动和保持不
self.action_space = spaces.Discrete(15)
# 定义观察空间为8维向量
# TODO 返回的状态目前只有位置坐标
@ -156,19 +156,14 @@ class PartitionMazeEnv(gym.Env):
[[self.phase], self.partition_values, np.zeros(np.array(self.car_pos).flatten().shape[0], dtype=np.float32)])
return state, reward, True, False, {}
else:
# 进入阶段 1初始化迷宫
# 初始化迷宫
self.phase = 1
state = np.concatenate(
[[self.phase], self.partition_values, np.array(self.car_pos).flatten()])
reward = 10
# 构建反向索引,方便后续计算
self.reverse_rectangles = {
v['center']: k for k, v in self.rectangles.items()}
return state, reward, False, False, {}
elif self.phase == 1:
# TODO 阶段一可以不写出来!!!
# 阶段 1初始化迷宫让多个车辆从区域中心出发前往最近的几个区域中心点
region_centers = [
(i, j, self.rectangles[(i, j)]['center'])
@ -193,7 +188,7 @@ class PartitionMazeEnv(gym.Env):
[[self.phase], self.partition_values,
np.array(self.car_pos).flatten()]
)
return state, 0.0, False, False, {}
return state, reward, False, False, {}
elif self.phase == 2:
# 阶段 2路径规划走迷宫
@ -212,6 +207,9 @@ class PartitionMazeEnv(gym.Env):
new_col = current_col - 1
elif action == 13 and current_col < len(self.col_cuts) - 2: # 右
new_col = new_col + 1
else:
# 无效动作,保持原地
pass
# 更新车辆位置
self.car_pos[current_car] = self.rectangles[(

View File

@ -1,12 +1,13 @@
# from env import PartitionMazeEnv
from env_dis import PartitionMazeEnv
from env import PartitionMazeEnv
# from env_dis import PartitionMazeEnv
env = PartitionMazeEnv()
state = env.reset()
print(state)
action_series = [0, 0, 3, 0, 0, 10]
action_series = [[0.1], [0.2], [0.4], [0], [0.1]]
# action_series = [0, 0, 3, 0, 0, 10]
for i in range(100):
action = action_series[i]

16
params2.yml Normal file
View File

@ -0,0 +1,16 @@
H : 50 # 区域高度网格点之间的距离为25m单位距离
W : 50 # 区域宽度
num_cars : 3 # 系统数量(车-巢-机系统个数)
# 时间系数(单位:秒,每个网格一张照片)
flight_time_factor : 3 # 每张照片对应的飞行时间无人机飞行速度为9.5m/s拍摄照片的时间间隔为3s
comp_time_factor : 5 # 无人机上每张照片计算时间5s
trans_time_factor : 0.3 # 每张照片传输时间0.3s
car_time_factor : 100 # TODO 汽车每单位距离的移动时间2s加了一个放大因子50
bs_time_factor : 5 # 机巢上每张照片计算时间
# 其他参数
flight_energy_factor : 0.05 # 单位:分钟/张
comp_energy_factor : 0.05 # TODO 计算能耗需要重新估计
trans_energy_factor : 0.0025
battery_energy_capacity : 20 # 无人机只进行飞行续航为30分钟