调整奖励
This commit is contained in:
parent
d364a1e4df
commit
ff23b5e745
@ -107,8 +107,6 @@ def main():
|
||||
# kwargs["a_lr"] *= 2
|
||||
# kwargs["c_lr"] *= 4
|
||||
|
||||
if not os.path.exists('model'):
|
||||
os.mkdir('model')
|
||||
# transfer opt to dictionary, and use it to init PPO_agent
|
||||
agent = PPO_agent(**vars(opt))
|
||||
if opt.Loadmodel:
|
||||
@ -154,7 +152,7 @@ def main():
|
||||
if total_steps % opt.eval_interval == 0:
|
||||
# evaluate the policy for 3 times, and get averaged result
|
||||
score = evaluate_policy(
|
||||
eval_env, agent, opt.max_action, turns=3)
|
||||
eval_env, agent, opt.max_action, turns=1)
|
||||
if opt.write:
|
||||
writer.add_scalar(
|
||||
'ep_r', score, global_step=total_steps)
|
||||
|
7
env.py
7
env.py
@ -40,7 +40,7 @@ class PartitionMazeEnv(gym.Env):
|
||||
# 可能需要手动修改的超参数
|
||||
##############################
|
||||
self.CUT_NUM = 4 # 横切一半,竖切一半
|
||||
self.BASE_LINE = 4000 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
||||
self.BASE_LINE = 3500 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
||||
self.MAX_STEPS = 50 # 迷宫走法步数上限
|
||||
|
||||
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
||||
@ -290,10 +290,9 @@ class PartitionMazeEnv(gym.Env):
|
||||
# 区域覆盖完毕,根据轨迹计算各车队的执行时间
|
||||
T = max([self._compute_motorcade_time(idx)
|
||||
for idx in range(self.num_cars)])
|
||||
# print(T)
|
||||
# print(self.partition_values)
|
||||
# print(self.car_traj)
|
||||
reward += self.BASE_LINE / T * 1000
|
||||
# reward += self.BASE_LINE - T
|
||||
# print(reward)
|
||||
elif done and self.step_count >= self.MAX_STEPS:
|
||||
reward += -1000
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user