From 2362de4c54829a22225947ea167f7778ba6d1481 Mon Sep 17 00:00:00 2001
From: weixin_46229132 <weixin_46229132@noreply.gitcode.com>
Date: Wed, 19 Mar 2025 01:04:03 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9dqn?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Duel_Double_DQN/DQN.py  | 4 ++--
 Duel_Double_DQN/main.py | 3 ++-
 human_action.py         | 7 ++++---
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/Duel_Double_DQN/DQN.py b/Duel_Double_DQN/DQN.py
index 044f284..8791edf 100644
--- a/Duel_Double_DQN/DQN.py
+++ b/Duel_Double_DQN/DQN.py
@@ -69,11 +69,11 @@ class DQN_agent(object):
 			else:
 				if state[0][0] == 0:
 					q_value = self.q_net(state)
-					q_value[:10] = - float('inf')
+					q_value[10:] = - float('inf')
 					a = q_value.argmax().item()
 				else:
 					q_value = self.q_net(state)
-					q_value[10:] = - float('inf')
+					q_value[:10] = - float('inf')
 					a = q_value.argmax().item()
 		return a
 
diff --git a/Duel_Double_DQN/main.py b/Duel_Double_DQN/main.py
index ce89e13..6338b48 100644
--- a/Duel_Double_DQN/main.py
+++ b/Duel_Double_DQN/main.py
@@ -111,7 +111,7 @@ def main():
             print('EnvName:', BriefEnvName[opt.EnvIdex],
                   'seed:', opt.seed, 'score:', score)
     else:
-        total_steps = 0
+        total_steps = 1
         while total_steps < opt.Max_train_steps:
             # Do not use opt.seed directly, or it can overfit to opt.seed
             s = env.reset(seed=env_seed)
@@ -122,6 +122,7 @@ def main():
             while not done:
                 # e-greedy exploration
                 if total_steps < opt.random_steps:
+                    # TODO sample取值有问题
                     a = env.action_space.sample()
                 else:
                     a = agent.select_action(s, deterministic=False)
diff --git a/human_action.py b/human_action.py
index dcd70c2..add8f4c 100644
--- a/human_action.py
+++ b/human_action.py
@@ -1,15 +1,16 @@
-from env import PartitionMazeEnv
+# from env import PartitionMazeEnv
+from env_dis import PartitionMazeEnv
 
 env = PartitionMazeEnv()
 
 state = env.reset()
 print(state)
 
-action_series = [[0], [0.5], [0], [0.2], [0.4], [0.7], [0.3], [0.8], [0.5], [0.1], [0.7], [0.7], [0.9], [0.9], [0.1], [0.9], [0.9], [0.1]]
+action_series = [0, 0, 3, 0, 0, 10]
 
 for i in range(100):
     action = action_series[i]
     state, reward, done, info, _ = env.step(action)
     print(state, reward, done, info)
     if done:
-        break
\ No newline at end of file
+        break