调整eval的输出

2025-03-19 10:58:43 +08:00 · 2025-03-19 10:58:43 +08:00 · c96c36d4cd
commit c96c36d4cd
parent 2362de4c54
8 changed files with 94 additions and 79 deletions
--- a/DDPG_solver/main.py
+++ b/DDPG_solver/main.py
@ -72,11 +72,11 @@ def main():
    # Seed Everything
    env_seed = opt.seed
-    torch.manual_seed(opt.seed)
+    # torch.manual_seed(opt.seed)
-    torch.cuda.manual_seed(opt.seed)
+    # torch.cuda.manual_seed(opt.seed)
-    torch.backends.cudnn.deterministic = True
+    # torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = False
+    # torch.backends.cudnn.benchmark = False
-    print("Random Seed: {}".format(opt.seed))
+    # print("Random Seed: {}".format(opt.seed))
    # Build SummaryWriter to record training curves
    if opt.write:
--- a/DDPG_solver/utils.py
+++ b/DDPG_solver/utils.py
@ -2,6 +2,7 @@ import torch.nn.functional as F
 import torch.nn as nn
 import argparse
 import torch
 import numpy as np
 class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, net_width, maxaction):
@ -40,14 +41,16 @@ def evaluate_policy(env, agent, turns = 3):
    for j in range(turns):
        s = env.reset()
        done = False
        action_series = []
        while not done:
            # Take deterministic actions at test time
            a = agent.select_action(s, deterministic=True)
            s_next, r, dw, tr, info = env.step(a)
            done = (dw or tr)
-
+            action_series.append(a[0])
            total_scores += r
            s = s_next
        print(np.round(action_series, 3))
    return int(total_scores/turns)
--- a/Duel_Double_DQN/DQN.py
+++ b/Duel_Double_DQN/DQN.py
@ -65,7 +65,7 @@ class DQN_agent(object):
 				if state[0][0] == 0:
 					a = np.random.randint(0,10)
 				else:
-					a = np.random.randint(10,13)
+					a = np.random.randint(10,14)
 			else:
 				if state[0][0] == 0:
 					q_value = self.q_net(state)
--- a/Duel_Double_DQN/main.py
+++ b/Duel_Double_DQN/main.py
@ -3,6 +3,7 @@ import os
 import shutil
 import argparse
 import torch
 import numpy as np
 import sys
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from env_dis import PartitionMazeEnv
@ -111,19 +112,20 @@ def main():
            print('EnvName:', BriefEnvName[opt.EnvIdex],
                  'seed:', opt.seed, 'score:', score)
    else:
-        total_steps = 1
+        total_steps = 0
        while total_steps < opt.Max_train_steps:
            # Do not use opt.seed directly, or it can overfit to opt.seed
            s = env.reset(seed=env_seed)
            env_seed += 1
            done = False
            '''Interact & trian'''
            while not done:
                # e-greedy exploration
                if total_steps < opt.random_steps:
-                    # TODO sample取值有问题
+                    if s[0] == 0:
-                    a = env.action_space.sample()
+                        a = np.random.randint(0, 10)
                    else:
                        a = np.random.randint(10, 14)
                else:
                    a = agent.select_action(s, deterministic=False)
                s_next, r, dw, tr, info = env.step(a)
--- a/env.py
+++ b/env.py
@ -39,8 +39,9 @@ class PartitionMazeEnv(gym.Env):
        ##############################
        # 可能需要手动修改的超参数
        ##############################
-        self.CUT_NUM = 6    # 横切一半，竖切一半
+        self.CUT_NUM = 4    # 横切一半，竖切一半
-        self.BASE_LINE = 12000     # 基准时间，通过greedy或者蒙特卡洛计算出来
+        self.BASE_LINE = 4000     # 基准时间，通过greedy或者蒙特卡洛计算出来
        self.MAX_STEPS = 200        # 迷宫走法步数上限
        self.phase = 0    # 阶段控制，0：区域划分阶段，1：迷宫初始化阶段，2：走迷宫阶段
        self.partition_step = 0      # 区域划分阶段步数，范围 0~4
@ -65,7 +66,6 @@ class PartitionMazeEnv(gym.Env):
        self.init_maze_step = 0
        # 路径规划阶段相关变量
        self.MAX_STEPS = 50         # 迷宫走法步数上限
        self.step_count = 0
        self.rectangles = {}
        self.car_pos = [(self.H / 2, self.W / 2) for _ in range(self.num_cars)]
@ -159,16 +159,11 @@ class PartitionMazeEnv(gym.Env):
                else:
                    # 进入阶段 1：初始化迷宫
                    self.phase = 1
                    state = np.concatenate(
                        [self.partition_values, np.array(self.car_pos).flatten()])
                    reward = 10
                    # 构建反向索引，方便后续计算
                    self.reverse_rectangles = {v['center']: k for k, v in self.rectangles.items()}
                    return state, reward, False, False, {}
        elif self.phase == 1:
            # 阶段 1：初始化迷宫，让多个车辆从区域中心出发，前往最近的几个区域中心点
                    region_centers = [
                        (i, j, self.rectangles[(i, j)]['center'])
                        for i in range(len(self.row_cuts) - 1)
@ -191,7 +186,7 @@ class PartitionMazeEnv(gym.Env):
                    state = np.concatenate(
                        [self.partition_values, np.array(self.car_pos).flatten()]
                    )
-            return state, 0.0, False, False, {}
+                    return state, reward, False, False, {}
        elif self.phase == 2:
            # 阶段 2：路径规划（走迷宫）
--- a/env_dis.py
+++ b/env_dis.py
@ -47,9 +47,9 @@ class PartitionMazeEnv(gym.Env):
        self.partition_values = np.zeros(
            self.CUT_NUM, dtype=np.float32)  # 存储 c₁, c₂, r₁, r₂
-        # 定义动作空间：长度为 14 的离散动作空间
+        # 定义动作空间：长度为 15 的离散动作空间
-        # 前 10 个表示切分动作 {0, 0.1, ..., 0.9}，后 4 个表示上下左右移动
+        # 前 10 个表示切分动作 {0, 0.1, ..., 0.9}，后 5 个表示上下左右移动和保持不动
-        self.action_space = spaces.Discrete(14)
+        self.action_space = spaces.Discrete(15)
        # 定义观察空间为8维向量
        # TODO 返回的状态目前只有位置坐标
@ -156,19 +156,14 @@ class PartitionMazeEnv(gym.Env):
                        [[self.phase], self.partition_values, np.zeros(np.array(self.car_pos).flatten().shape[0], dtype=np.float32)])
                    return state, reward, True, False, {}
                else:
-                    # 进入阶段 1：初始化迷宫
+                    # 初始化迷宫
                    self.phase = 1
                    state = np.concatenate(
                        [[self.phase], self.partition_values, np.array(self.car_pos).flatten()])
                    reward = 10
                    # 构建反向索引，方便后续计算
                    self.reverse_rectangles = {
                        v['center']: k for k, v in self.rectangles.items()}
                    return state, reward, False, False, {}
        elif self.phase == 1:
            # TODO 阶段一可以不写出来！！！
                    # 阶段 1：初始化迷宫，让多个车辆从区域中心出发，前往最近的几个区域中心点
                    region_centers = [
                        (i, j, self.rectangles[(i, j)]['center'])
@ -193,7 +188,7 @@ class PartitionMazeEnv(gym.Env):
                        [[self.phase], self.partition_values,
                            np.array(self.car_pos).flatten()]
                    )
-            return state, 0.0, False, False, {}
+                    return state, reward, False, False, {}
        elif self.phase == 2:
            # 阶段 2：路径规划（走迷宫）
@ -212,6 +207,9 @@ class PartitionMazeEnv(gym.Env):
                new_col = current_col - 1
            elif action == 13 and current_col < len(self.col_cuts) - 2:  # 右
                new_col = new_col + 1
            else:
                # 无效动作，保持原地
                pass
            # 更新车辆位置
            self.car_pos[current_car] = self.rectangles[(
--- a/human_action.py
+++ b/human_action.py
@ -1,12 +1,13 @@
-# from env import PartitionMazeEnv
+from env import PartitionMazeEnv
-from env_dis import PartitionMazeEnv
+# from env_dis import PartitionMazeEnv
 env = PartitionMazeEnv()
 state = env.reset()
 print(state)
-action_series = [0, 0, 3, 0, 0, 10]
+action_series = [[0.1], [0.2], [0.4], [0], [0.1]]
 # action_series = [0, 0, 3, 0, 0, 10]
 for i in range(100):
    action = action_series[i]
--- a/params2.yml
+++ b/params2.yml
@ -0,0 +1,16 @@
 H : 50         # 区域高度，网格点之间的距离为25m（单位距离）
 W : 50         # 区域宽度
 num_cars : 3           # 系统数量（车-巢-机系统个数）
 # 时间系数（单位：秒，每个网格一张照片）
 flight_time_factor : 3     # 每张照片对应的飞行时间，无人机飞行速度为9.5m/s，拍摄照片的时间间隔为3s
 comp_time_factor : 5    # 无人机上每张照片计算时间，5s
 trans_time_factor : 0.3    # 每张照片传输时间，0.3s
 car_time_factor : 100    # TODO 汽车每单位距离的移动时间，2s，加了一个放大因子50
 bs_time_factor : 5    # 机巢上每张照片计算时间
 # 其他参数
 flight_energy_factor : 0.05     # 单位：分钟/张
 comp_energy_factor : 0.05    # TODO 计算能耗需要重新估计
 trans_energy_factor : 0.0025
 battery_energy_capacity : 20  # 无人机只进行飞行，续航为30分钟