DQN让它先看答案
This commit is contained in:
parent
f4fb963c06
commit
67c7a9d6c7
@ -123,10 +123,12 @@ def main():
|
||||
while not done:
|
||||
# e-greedy exploration
|
||||
if total_steps < opt.random_steps:
|
||||
if s[0] == 0:
|
||||
a = np.random.randint(0, 10)
|
||||
else:
|
||||
a = np.random.randint(10, 14)
|
||||
# if s[0] == 0:
|
||||
# a = np.random.randint(0, 10)
|
||||
# else:
|
||||
# a = np.random.randint(10, 14)
|
||||
action_series = [0, 0, 3, 0, 10]
|
||||
a = action_series[total_steps % 5]
|
||||
else:
|
||||
a = agent.select_action(s, deterministic=False)
|
||||
s_next, r, dw, tr, info = env.step(a)
|
||||
|
@ -275,7 +275,7 @@ class PartitionMazeEnv(gym.Env):
|
||||
# print(self.partition_values)
|
||||
# print(self.car_traj)
|
||||
reward += self.BASE_LINE / T * 1000
|
||||
print(reward)
|
||||
# print(reward)
|
||||
elif done and self.step_count >= self.MAX_STEPS:
|
||||
reward += -1000
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user