ddpg求解env_part

2025-03-29 12:00:26 +08:00 · 2025-03-29 12:00:26 +08:00 · f05f8400fb
commit f05f8400fb
parent 0cf336c96d
2 changed files with 10 additions and 9 deletions
--- a/DDPG_solver/main.py
+++ b/DDPG_solver/main.py
@ -9,7 +9,7 @@ import torch
 import sys
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from env import PartitionMazeEnv
+from env_partion import PartitionEnv
 # fmt: on
 '''Hyperparameter Setting'''
@ -54,16 +54,16 @@ print(opt)
 def main():
-    EnvName = ['PartitionMaze_DDPG', 'Pendulum-v1', 'LunarLanderContinuous-v2', 'Humanoid-v4',
+    EnvName = ['Partition_DDPG', 'Pendulum-v1', 'LunarLanderContinuous-v2', 'Humanoid-v4',
               'HalfCheetah-v4', 'BipedalWalker-v3', 'BipedalWalkerHardcore-v3']
-    BrifEnvName = ['PM_DDPG', 'PV1', 'LLdV2',
+    BrifEnvName = ['Part_DDPG', 'PV1', 'LLdV2',
                   'Humanv4', 'HCv4', 'BWv3', 'BWHv3']
    # Build Env
    # env = gym.make(EnvName[opt.EnvIdex], render_mode = "human" if opt.render else None)
-    env = PartitionMazeEnv()
+    env = PartitionEnv()
    # eval_env = gym.make(EnvName[opt.EnvIdex])
-    eval_env = PartitionMazeEnv()
+    eval_env = PartitionEnv()
    opt.state_dim = env.observation_space.shape[0]
    opt.action_dim = env.action_space.shape[0]
    # remark: action space【-max,max】
--- a/env_partion.py
+++ b/env_partion.py
@ -18,11 +18,11 @@ class PartitionEnv(gym.Env):
        ##############################
        # 可能需要手动修改的超参数
        ##############################
-        self.params = 'params3'
+        self.params = 'params2'
-        self.CUT_NUM = 2
+        self.CUT_NUM = 4
-        self.ROW_CUT_LIMIT = 1
+        self.ROW_CUT_LIMIT = 3
        self.COL_CUT_LIMIT = 1
-        self.BASE_LINE = 5000
+        self.BASE_LINE = 10000
        self.mTSP_STEPS = 10000
        # 车队参数设置
@ -176,6 +176,7 @@ class PartitionEnv(gym.Env):
                # print(best_path)
                reward += self.BASE_LINE - best_time
                print(reward)
                return state, reward, True, False, best_path