DQN让它先看答案

This commit is contained in:
weixin_46229132 2025-03-20 14:05:15 +08:00
parent f4fb963c06
commit 67c7a9d6c7
2 changed files with 7 additions and 5 deletions

View File

@ -123,10 +123,12 @@ def main():
while not done: while not done:
# e-greedy exploration # e-greedy exploration
if total_steps < opt.random_steps: if total_steps < opt.random_steps:
if s[0] == 0: # if s[0] == 0:
a = np.random.randint(0, 10) # a = np.random.randint(0, 10)
else: # else:
a = np.random.randint(10, 14) # a = np.random.randint(10, 14)
action_series = [0, 0, 3, 0, 10]
a = action_series[total_steps % 5]
else: else:
a = agent.select_action(s, deterministic=False) a = agent.select_action(s, deterministic=False)
s_next, r, dw, tr, info = env.step(a) s_next, r, dw, tr, info = env.step(a)

View File

@ -275,7 +275,7 @@ class PartitionMazeEnv(gym.Env):
# print(self.partition_values) # print(self.partition_values)
# print(self.car_traj) # print(self.car_traj)
reward += self.BASE_LINE / T * 1000 reward += self.BASE_LINE / T * 1000
print(reward) # print(reward)
elif done and self.step_count >= self.MAX_STEPS: elif done and self.step_count >= self.MAX_STEPS:
reward += -1000 reward += -1000