DQN让它先看答案
This commit is contained in:
parent
f4fb963c06
commit
67c7a9d6c7
@ -123,10 +123,12 @@ def main():
|
|||||||
while not done:
|
while not done:
|
||||||
# e-greedy exploration
|
# e-greedy exploration
|
||||||
if total_steps < opt.random_steps:
|
if total_steps < opt.random_steps:
|
||||||
if s[0] == 0:
|
# if s[0] == 0:
|
||||||
a = np.random.randint(0, 10)
|
# a = np.random.randint(0, 10)
|
||||||
else:
|
# else:
|
||||||
a = np.random.randint(10, 14)
|
# a = np.random.randint(10, 14)
|
||||||
|
action_series = [0, 0, 3, 0, 10]
|
||||||
|
a = action_series[total_steps % 5]
|
||||||
else:
|
else:
|
||||||
a = agent.select_action(s, deterministic=False)
|
a = agent.select_action(s, deterministic=False)
|
||||||
s_next, r, dw, tr, info = env.step(a)
|
s_next, r, dw, tr, info = env.step(a)
|
||||||
|
@ -275,7 +275,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
# print(self.partition_values)
|
# print(self.partition_values)
|
||||||
# print(self.car_traj)
|
# print(self.car_traj)
|
||||||
reward += self.BASE_LINE / T * 1000
|
reward += self.BASE_LINE / T * 1000
|
||||||
print(reward)
|
# print(reward)
|
||||||
elif done and self.step_count >= self.MAX_STEPS:
|
elif done and self.step_count >= self.MAX_STEPS:
|
||||||
reward += -1000
|
reward += -1000
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user