调整奖励函数
This commit is contained in:
parent
84f69f4293
commit
dab8f4fd8f
@ -37,6 +37,7 @@ class PartitionEnv(gym.Env):
|
|||||||
self.col_cuts = self.ORI_COL_CUTS[:]
|
self.col_cuts = self.ORI_COL_CUTS[:]
|
||||||
self.rectangles = []
|
self.rectangles = []
|
||||||
self.adjust_step = 0
|
self.adjust_step = 0
|
||||||
|
self.best_path = None
|
||||||
|
|
||||||
# 车队参数设置
|
# 车队参数设置
|
||||||
with open(self.params + '.yml', 'r', encoding='utf-8') as file:
|
with open(self.params + '.yml', 'r', encoding='utf-8') as file:
|
||||||
@ -64,6 +65,7 @@ class PartitionEnv(gym.Env):
|
|||||||
self.col_cuts = self.ORI_COL_CUTS[:]
|
self.col_cuts = self.ORI_COL_CUTS[:]
|
||||||
self.rectangles = []
|
self.rectangles = []
|
||||||
self.adjust_step = 0
|
self.adjust_step = 0
|
||||||
|
self.best_path = None
|
||||||
|
|
||||||
# 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0)
|
# 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0)
|
||||||
state = np.array(self.row_cuts + self.col_cuts)
|
state = np.array(self.row_cuts + self.col_cuts)
|
||||||
@ -90,37 +92,34 @@ class PartitionEnv(gym.Env):
|
|||||||
elif action == 9:
|
elif action == 9:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
self.adjust_step += 1
|
|
||||||
state = np.array(self.row_cuts + self.col_cuts)
|
|
||||||
|
|
||||||
if self.row_cuts[0] < self.row_cuts[1] < self.row_cuts[2] < self.row_cuts[3] < self.row_cuts[4] and self.col_cuts[0] < self.col_cuts[1] < self.col_cuts[2]:
|
if self.row_cuts[0] < self.row_cuts[1] < self.row_cuts[2] < self.row_cuts[3] < self.row_cuts[4] and self.col_cuts[0] < self.col_cuts[1] < self.col_cuts[2]:
|
||||||
# 调整合法,验证分区情况是否满足条件
|
# 调整是合法的,验证分区情况是否满足条件
|
||||||
rectangles = self.if_valid_partition()
|
rectangles = self.if_valid_partition()
|
||||||
|
|
||||||
if not rectangles:
|
if not rectangles:
|
||||||
# 不满足条件,结束
|
# 不满足条件,时间给一个很大的值
|
||||||
reward = -10000
|
best_time = self.BASE_LINE * 2
|
||||||
return state, reward, True, False, {}
|
|
||||||
else:
|
else:
|
||||||
# 满足条件,继续进行路径规划
|
# 满足条件,继续进行路径规划
|
||||||
|
|
||||||
# 每隔10步计算一次路径,第一次也需要计算路径,记录最佳路径
|
# 每隔10步计算一次路径,第一次也需要计算路径,记录最佳路径
|
||||||
if self.adjust_step % 10 == 0 or self.adjust_step == 1:
|
if self.adjust_step % 10 == 0 or self.adjust_step == 1 or self.best_path is None:
|
||||||
best_time, self.best_path = self.ga_solver(rectangles)
|
best_time, self.best_path = self.ga_solver(rectangles)
|
||||||
else:
|
else:
|
||||||
# 根据最佳路径计算当前时间
|
# 根据最佳路径计算当前时间
|
||||||
best_time = self.get_best_time(self.best_path, rectangles)
|
best_time = self.get_best_time(self.best_path, rectangles)
|
||||||
|
|
||||||
reward = self.BASE_LINE - best_time
|
else:
|
||||||
|
# 调整不合法,时间给一个很大的值
|
||||||
|
best_time = self.BASE_LINE * 2
|
||||||
|
|
||||||
|
reward = self.calc_reward(best_time)
|
||||||
|
self.adjust_step += 1
|
||||||
|
state = np.array(self.row_cuts + self.col_cuts)
|
||||||
|
|
||||||
if self.adjust_step < self.MAX_ADJUST_STEP:
|
if self.adjust_step < self.MAX_ADJUST_STEP:
|
||||||
done = False
|
return state, reward, False, False, {}
|
||||||
else:
|
else:
|
||||||
done = True
|
return state, reward, True, False, {}
|
||||||
return state, reward, done, False, self.best_path
|
|
||||||
else:
|
|
||||||
# 调整不合法,结束
|
|
||||||
return state, -10, True, False, {}
|
|
||||||
|
|
||||||
def if_valid_partition(self):
|
def if_valid_partition(self):
|
||||||
rectangles = []
|
rectangles = []
|
||||||
@ -220,6 +219,27 @@ class PartitionEnv(gym.Env):
|
|||||||
best_time = ga.compute_pathlen(best_path)
|
best_time = ga.compute_pathlen(best_path)
|
||||||
return best_time
|
return best_time
|
||||||
|
|
||||||
|
def calc_reward(self, best_time):
|
||||||
|
"""
|
||||||
|
计算奖励
|
||||||
|
Args:
|
||||||
|
best_time (float): 当前路径的时间
|
||||||
|
Returns:
|
||||||
|
float: 计算得到的奖励值
|
||||||
|
"""
|
||||||
|
time_diff = self.BASE_LINE - best_time
|
||||||
|
|
||||||
|
# 归一化时间差
|
||||||
|
normalized_diff = 1 / (1 + np.exp(-time_diff/20))
|
||||||
|
|
||||||
|
# 计算轮次权重
|
||||||
|
step_weight = 1 / (1 + np.exp(-self.adjust_step/10))
|
||||||
|
|
||||||
|
# 计算最终奖励(添加缩放因子)
|
||||||
|
reward = normalized_diff * step_weight * 10 # 10是缩放因子
|
||||||
|
|
||||||
|
return reward
|
||||||
|
|
||||||
def render(self):
|
def render(self):
|
||||||
if self.phase == 1:
|
if self.phase == 1:
|
||||||
print("Phase 1: Initialize maze environment.")
|
print("Phase 1: Initialize maze environment.")
|
||||||
|
Loading…
Reference in New Issue
Block a user