From 2362de4c54829a22225947ea167f7778ba6d1481 Mon Sep 17 00:00:00 2001 From: weixin_46229132 Date: Wed, 19 Mar 2025 01:04:03 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9dqn?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Duel_Double_DQN/DQN.py | 4 ++-- Duel_Double_DQN/main.py | 3 ++- human_action.py | 7 ++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Duel_Double_DQN/DQN.py b/Duel_Double_DQN/DQN.py index 044f284..8791edf 100644 --- a/Duel_Double_DQN/DQN.py +++ b/Duel_Double_DQN/DQN.py @@ -69,11 +69,11 @@ class DQN_agent(object): else: if state[0][0] == 0: q_value = self.q_net(state) - q_value[:10] = - float('inf') + q_value[10:] = - float('inf') a = q_value.argmax().item() else: q_value = self.q_net(state) - q_value[10:] = - float('inf') + q_value[:10] = - float('inf') a = q_value.argmax().item() return a diff --git a/Duel_Double_DQN/main.py b/Duel_Double_DQN/main.py index ce89e13..6338b48 100644 --- a/Duel_Double_DQN/main.py +++ b/Duel_Double_DQN/main.py @@ -111,7 +111,7 @@ def main(): print('EnvName:', BriefEnvName[opt.EnvIdex], 'seed:', opt.seed, 'score:', score) else: - total_steps = 0 + total_steps = 1 while total_steps < opt.Max_train_steps: # Do not use opt.seed directly, or it can overfit to opt.seed s = env.reset(seed=env_seed) @@ -122,6 +122,7 @@ def main(): while not done: # e-greedy exploration if total_steps < opt.random_steps: + # TODO sample取值有问题 a = env.action_space.sample() else: a = agent.select_action(s, deterministic=False) diff --git a/human_action.py b/human_action.py index dcd70c2..add8f4c 100644 --- a/human_action.py +++ b/human_action.py @@ -1,15 +1,16 @@ -from env import PartitionMazeEnv +# from env import PartitionMazeEnv +from env_dis import PartitionMazeEnv env = PartitionMazeEnv() state = env.reset() print(state) -action_series = [[0], [0.5], [0], [0.2], [0.4], [0.7], [0.3], [0.8], [0.5], [0.1], [0.7], [0.7], [0.9], [0.9], [0.1], [0.9], [0.9], [0.1]] +action_series = [0, 0, 3, 0, 0, 10] for i in range(100): action = action_series[i] state, reward, done, info, _ = env.step(action) print(state, reward, done, info) if done: - break \ No newline at end of file + break