调整奖励函数

This commit is contained in:
weixin_46229132 2025-03-31 11:12:01 +08:00
parent 84f69f4293
commit dab8f4fd8f

View File

@ -37,6 +37,7 @@ class PartitionEnv(gym.Env):
self.col_cuts = self.ORI_COL_CUTS[:] self.col_cuts = self.ORI_COL_CUTS[:]
self.rectangles = [] self.rectangles = []
self.adjust_step = 0 self.adjust_step = 0
self.best_path = None
# 车队参数设置 # 车队参数设置
with open(self.params + '.yml', 'r', encoding='utf-8') as file: with open(self.params + '.yml', 'r', encoding='utf-8') as file:
@ -64,6 +65,7 @@ class PartitionEnv(gym.Env):
self.col_cuts = self.ORI_COL_CUTS[:] self.col_cuts = self.ORI_COL_CUTS[:]
self.rectangles = [] self.rectangles = []
self.adjust_step = 0 self.adjust_step = 0
self.best_path = None
# 状态:前 4 维为 partition_values其余为区域访问状态初始全0 # 状态:前 4 维为 partition_values其余为区域访问状态初始全0
state = np.array(self.row_cuts + self.col_cuts) state = np.array(self.row_cuts + self.col_cuts)
@ -90,37 +92,34 @@ class PartitionEnv(gym.Env):
elif action == 9: elif action == 9:
pass pass
self.adjust_step += 1
state = np.array(self.row_cuts + self.col_cuts)
if self.row_cuts[0] < self.row_cuts[1] < self.row_cuts[2] < self.row_cuts[3] < self.row_cuts[4] and self.col_cuts[0] < self.col_cuts[1] < self.col_cuts[2]: if self.row_cuts[0] < self.row_cuts[1] < self.row_cuts[2] < self.row_cuts[3] < self.row_cuts[4] and self.col_cuts[0] < self.col_cuts[1] < self.col_cuts[2]:
# 调整合法,验证分区情况是否满足条件 # 调整是合法的,验证分区情况是否满足条件
rectangles = self.if_valid_partition() rectangles = self.if_valid_partition()
if not rectangles: if not rectangles:
# 不满足条件,结束 # 不满足条件,时间给一个很大的值
reward = -10000 best_time = self.BASE_LINE * 2
return state, reward, True, False, {}
else: else:
# 满足条件,继续进行路径规划 # 满足条件,继续进行路径规划
# 每隔10步计算一次路径第一次也需要计算路径记录最佳路径 # 每隔10步计算一次路径第一次也需要计算路径记录最佳路径
if self.adjust_step % 10 == 0 or self.adjust_step == 1: if self.adjust_step % 10 == 0 or self.adjust_step == 1 or self.best_path is None:
best_time, self.best_path = self.ga_solver(rectangles) best_time, self.best_path = self.ga_solver(rectangles)
else: else:
# 根据最佳路径计算当前时间 # 根据最佳路径计算当前时间
best_time = self.get_best_time(self.best_path, rectangles) best_time = self.get_best_time(self.best_path, rectangles)
reward = self.BASE_LINE - best_time else:
# 调整不合法,时间给一个很大的值
best_time = self.BASE_LINE * 2
reward = self.calc_reward(best_time)
self.adjust_step += 1
state = np.array(self.row_cuts + self.col_cuts)
if self.adjust_step < self.MAX_ADJUST_STEP: if self.adjust_step < self.MAX_ADJUST_STEP:
done = False return state, reward, False, False, {}
else: else:
done = True return state, reward, True, False, {}
return state, reward, done, False, self.best_path
else:
# 调整不合法,结束
return state, -10, True, False, {}
def if_valid_partition(self): def if_valid_partition(self):
rectangles = [] rectangles = []
@ -220,6 +219,27 @@ class PartitionEnv(gym.Env):
best_time = ga.compute_pathlen(best_path) best_time = ga.compute_pathlen(best_path)
return best_time return best_time
def calc_reward(self, best_time):
"""
计算奖励
Args:
best_time (float): 当前路径的时间
Returns:
float: 计算得到的奖励值
"""
time_diff = self.BASE_LINE - best_time
# 归一化时间差
normalized_diff = 1 / (1 + np.exp(-time_diff/20))
# 计算轮次权重
step_weight = 1 / (1 + np.exp(-self.adjust_step/10))
# 计算最终奖励(添加缩放因子)
reward = normalized_diff * step_weight * 10 # 10是缩放因子
return reward
def render(self): def render(self):
if self.phase == 1: if self.phase == 1:
print("Phase 1: Initialize maze environment.") print("Phase 1: Initialize maze environment.")