diff --git a/env_partion_dist.py b/env_partion_dist.py index 69c732c..bb6bf56 100644 --- a/env_partion_dist.py +++ b/env_partion_dist.py @@ -37,6 +37,7 @@ class PartitionEnv(gym.Env): self.col_cuts = self.ORI_COL_CUTS[:] self.rectangles = [] self.adjust_step = 0 + self.best_path = None # 车队参数设置 with open(self.params + '.yml', 'r', encoding='utf-8') as file: @@ -64,6 +65,7 @@ class PartitionEnv(gym.Env): self.col_cuts = self.ORI_COL_CUTS[:] self.rectangles = [] self.adjust_step = 0 + self.best_path = None # 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0) state = np.array(self.row_cuts + self.col_cuts) @@ -90,37 +92,34 @@ class PartitionEnv(gym.Env): elif action == 9: pass - self.adjust_step += 1 - state = np.array(self.row_cuts + self.col_cuts) - if self.row_cuts[0] < self.row_cuts[1] < self.row_cuts[2] < self.row_cuts[3] < self.row_cuts[4] and self.col_cuts[0] < self.col_cuts[1] < self.col_cuts[2]: - # 调整合法,验证分区情况是否满足条件 + # 调整是合法的,验证分区情况是否满足条件 rectangles = self.if_valid_partition() if not rectangles: - # 不满足条件,结束 - reward = -10000 - return state, reward, True, False, {} + # 不满足条件,时间给一个很大的值 + best_time = self.BASE_LINE * 2 else: # 满足条件,继续进行路径规划 - # 每隔10步计算一次路径,第一次也需要计算路径,记录最佳路径 - if self.adjust_step % 10 == 0 or self.adjust_step == 1: + if self.adjust_step % 10 == 0 or self.adjust_step == 1 or self.best_path is None: best_time, self.best_path = self.ga_solver(rectangles) else: # 根据最佳路径计算当前时间 best_time = self.get_best_time(self.best_path, rectangles) - reward = self.BASE_LINE - best_time - - if self.adjust_step < self.MAX_ADJUST_STEP: - done = False - else: - done = True - return state, reward, done, False, self.best_path else: - # 调整不合法,结束 - return state, -10, True, False, {} + # 调整不合法,时间给一个很大的值 + best_time = self.BASE_LINE * 2 + + reward = self.calc_reward(best_time) + self.adjust_step += 1 + state = np.array(self.row_cuts + self.col_cuts) + + if self.adjust_step < self.MAX_ADJUST_STEP: + return state, reward, False, False, {} + else: + return state, reward, True, False, {} def if_valid_partition(self): rectangles = [] @@ -220,6 +219,27 @@ class PartitionEnv(gym.Env): best_time = ga.compute_pathlen(best_path) return best_time + def calc_reward(self, best_time): + """ + 计算奖励 + Args: + best_time (float): 当前路径的时间 + Returns: + float: 计算得到的奖励值 + """ + time_diff = self.BASE_LINE - best_time + + # 归一化时间差 + normalized_diff = 1 / (1 + np.exp(-time_diff/20)) + + # 计算轮次权重 + step_weight = 1 / (1 + np.exp(-self.adjust_step/10)) + + # 计算最终奖励(添加缩放因子) + reward = normalized_diff * step_weight * 10 # 10是缩放因子 + + return reward + def render(self): if self.phase == 1: print("Phase 1: Initialize maze environment.")