diff --git a/env_partion_dist.py b/env_partion_dist.py
index 69c732c..bb6bf56 100644
--- a/env_partion_dist.py
+++ b/env_partion_dist.py
@@ -37,6 +37,7 @@ class PartitionEnv(gym.Env):
         self.col_cuts = self.ORI_COL_CUTS[:]
         self.rectangles = []
         self.adjust_step = 0
+        self.best_path = None
 
         # 车队参数设置
         with open(self.params + '.yml', 'r', encoding='utf-8') as file:
@@ -64,6 +65,7 @@ class PartitionEnv(gym.Env):
         self.col_cuts = self.ORI_COL_CUTS[:]
         self.rectangles = []
         self.adjust_step = 0
+        self.best_path = None
 
         # 状态：前 4 维为 partition_values，其余为区域访问状态（初始全0）
         state = np.array(self.row_cuts + self.col_cuts)
@@ -90,37 +92,34 @@ class PartitionEnv(gym.Env):
         elif action == 9:
             pass
 
-        self.adjust_step += 1
-        state = np.array(self.row_cuts + self.col_cuts)
-
         if self.row_cuts[0] < self.row_cuts[1] < self.row_cuts[2] < self.row_cuts[3] < self.row_cuts[4] and self.col_cuts[0] < self.col_cuts[1] < self.col_cuts[2]:
-            # 调整合法，验证分区情况是否满足条件
+            # 调整是合法的，验证分区情况是否满足条件
             rectangles = self.if_valid_partition()
 
             if not rectangles:
-                # 不满足条件，结束
-                reward = -10000
-                return state, reward, True, False, {}
+                # 不满足条件，时间给一个很大的值
+                best_time = self.BASE_LINE * 2
             else:
                 # 满足条件，继续进行路径规划
-
                 # 每隔10步计算一次路径，第一次也需要计算路径，记录最佳路径
-                if self.adjust_step % 10 == 0 or self.adjust_step == 1:
+                if self.adjust_step % 10 == 0 or self.adjust_step == 1 or self.best_path is None:
                     best_time, self.best_path = self.ga_solver(rectangles)
                 else:
                     # 根据最佳路径计算当前时间
                     best_time = self.get_best_time(self.best_path, rectangles)
 
-                reward = self.BASE_LINE - best_time
-
-                if self.adjust_step < self.MAX_ADJUST_STEP:
-                    done = False
-                else:
-                    done = True
-                return state, reward, done, False, self.best_path
         else:
-            # 调整不合法，结束
-            return state, -10, True, False, {}
+            # 调整不合法，时间给一个很大的值
+            best_time = self.BASE_LINE * 2
+
+        reward = self.calc_reward(best_time)
+        self.adjust_step += 1
+        state = np.array(self.row_cuts + self.col_cuts)
+
+        if self.adjust_step < self.MAX_ADJUST_STEP:
+            return state, reward, False, False, {}
+        else:
+            return state, reward, True, False, {}
 
     def if_valid_partition(self):
         rectangles = []
@@ -220,6 +219,27 @@ class PartitionEnv(gym.Env):
         best_time = ga.compute_pathlen(best_path)
         return best_time
 
+    def calc_reward(self, best_time):
+        """
+        计算奖励
+        Args:
+            best_time (float): 当前路径的时间
+        Returns:
+            float: 计算得到的奖励值
+        """
+        time_diff = self.BASE_LINE - best_time
+
+        # 归一化时间差
+        normalized_diff = 1 / (1 + np.exp(-time_diff/20))
+
+        # 计算轮次权重
+        step_weight = 1 / (1 + np.exp(-self.adjust_step/10))
+
+        # 计算最终奖励（添加缩放因子）
+        reward = normalized_diff * step_weight * 10  # 10是缩放因子
+
+        return reward
+
     def render(self):
         if self.phase == 1:
             print("Phase 1: Initialize maze environment.")