修改dqn参数

2025-04-02 21:33:40 +08:00 · 2025-04-02 21:33:40 +08:00 · 0be9fa596a
commit 0be9fa596a
parent 981681c1bd
5 changed files with 260 additions and 8 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,7 +9,7 @@ __pycache__/

 # Pytorch weights
 model/
-logs/
+runs/

 # Distribution / packaging
 .Python
--- a/Duel_Double_DQN/main.py
+++ b/Duel_Double_DQN/main.py
@ -18,7 +18,7 @@ parser = argparse.ArgumentParser()
 parser.add_argument('--dvc', type=str, default='cpu',
                    help='running device: cuda or cpu')
 parser.add_argument('--EnvIdex', type=int, default=0, help='CP-v1, LLd-v2')
-parser.add_argument('--write', type=str2bool, default=False,
+parser.add_argument('--write', type=str2bool, default=True,
                    help='Use SummaryWriter to record the training')
 parser.add_argument('--render', type=str2bool,
                    default=False, help='Render or Not')
@ -31,10 +31,10 @@ parser.add_argument('--seed', type=int, default=0, help='random seed')
 parser.add_argument('--Max_train_steps', type=int,
                    default=int(1e8), help='Max training steps')
 parser.add_argument('--save_interval', type=int,
-                    default=int(5e3), help='Model saving interval, in steps.')
+                    default=int(5e4), help='Model saving interval, in steps.')
 parser.add_argument('--eval_interval', type=int, default=int(2e3),
                    help='Model evaluating interval, in steps.')
-parser.add_argument('--random_steps', type=int, default=int(3e3),
+parser.add_argument('--random_steps', type=int, default=int(6e3),
                    help='steps for random policy to explore')
 parser.add_argument('--update_every', type=int,
                    default=10, help='training frequency')
@ -47,7 +47,7 @@ parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate')
 parser.add_argument('--batch_size', type=int, default=256,
                    help='lenth of sliced trajectory')
 parser.add_argument('--exp_noise', type=float,
-                    default=0.2, help='explore noise')
+                    default=0.3, help='explore noise')
 parser.add_argument('--noise_decay', type=float, default=0.99,
                    help='decay rate of explore noise')
 parser.add_argument('--Double', type=str2bool, default=False,
--- a/GA/GA_MTSP.py
+++ b/GA/GA_MTSP.py
@ -0,0 +1,250 @@
+import numpy as np
+import random
+import math
+import matplotlib.pyplot as plt
+import time
+
+
+class MTSP_GA:
+    def __init__(self, cities, vehicle_num, population_size=200, max_iterations=1500):
+        """
+        初始化遗传算法求解器
+        Args:
+            cities: 城市坐标数组，第一个城市为起始点
+            vehicle_num: 车辆数量
+            population_size: 种群大小
+            max_iterations: 最大迭代次数
+        """
+        self.cities = np.array(cities)
+        self.city_count = len(cities)
+        self.vehicle_num = vehicle_num
+        self.origin = 0  # 起始点
+
+        # GA参数
+        self.population_size = population_size
+        self.max_iterations = max_iterations
+        self.retain_rate = 0.3      # 强者存活率
+        self.random_rate = 0.5      # 弱者存活概率
+        self.mutation_rate = 0.3    # 变异率
+
+        # 计算距离矩阵
+        self.distance_matrix = self._compute_distance_matrix()
+
+        # 记录收敛过程
+        self.distance_history = []
+        self.best_path_history = []
+
+    def _compute_distance_matrix(self):
+        """计算城市间距离矩阵"""
+        distance = np.zeros((self.city_count, self.city_count))
+        for i in range(self.city_count):
+            for j in range(self.city_count):
+                distance[i][j] = math.sqrt(
+                    (self.cities[i][0] - self.cities[j][0]) ** 2 +
+                    (self.cities[i][1] - self.cities[j][1]) ** 2
+                )
+        return distance
+
+    def _create_individual(self):
+        """生成初始个体"""
+        index = [i for i in range(self.city_count)]
+        index.remove(self.origin)
+        a = int(np.floor(len(index)/self.vehicle_num))
+        X = []
+        for i in range(self.vehicle_num):
+            if i < self.vehicle_num-1:
+                x = index[a*i:a*(i+1)]
+            else:
+                x = index[a*i:]
+            X.append(x)
+        return X
+
+    def _get_total_distance(self, X):
+        """计算路径总距离"""
+        distance = 0
+        distance_list = []
+        for x in X:
+            d = self.distance_matrix[self.origin][x[0]]  # 从起点到第一个城市
+            d += self.distance_matrix[self.origin][x[-1]]  # 从最后一个城市返回起点
+            for i in range(len(x)-1):
+                d += self.distance_matrix[x[i]][x[i+1]]
+            distance += d
+            distance_list.append(d)
+        return distance, distance_list
+
+    def _selection(self, population):
+        """选择操作"""
+        graded = [[self._get_total_distance(x)[0], x] for x in population]
+        graded = [x[1] for x in sorted(graded)]
+        retain_length = int(len(graded) * self.retain_rate)
+        parents = graded[:retain_length]
+
+        for chromosome in graded[retain_length:]:
+            if random.random() < self.random_rate:
+                parents.append(chromosome)
+        return parents
+
+    def _crossover(self, parents):
+        """交叉操作"""
+        target_count = self.population_size - len(parents)
+        children = []
+        while len(children) < target_count:
+            male_index = random.randint(0, len(parents) - 1)
+            female_index = random.randint(0, len(parents) - 1)
+            if male_index != female_index:
+                male = parents[male_index]
+                female = parents[female_index]
+
+                gene1 = []
+                gene2 = []
+                for i in range(len(male)):
+                    gene1 += male[i]
+                    gene2 += female[i]
+
+                left = random.randint(0, len(gene1)//2)
+                right = random.randint(left + 1, len(gene1))
+                cut = gene1[left:right]
+                copy = gene2.copy()
+                for j in cut:
+                    copy.remove(j)
+
+                child = copy + cut
+                a = int(np.floor(len(child)/self.vehicle_num))
+                child_c = []
+                for i in range(self.vehicle_num):
+                    if i < self.vehicle_num - 1:
+                        x = child[a * i:a * (i + 1)]
+                    else:
+                        x = child[a * i:]
+                    child_c.append(x)
+                children.append(child_c)
+        return children
+
+    def _mutation(self, children):
+        """变异操作"""
+        for i in range(len(children)):
+            if random.random() < self.mutation_rate:
+                child = children[i]
+                for j in range(int(np.floor(len(child)/2))):
+                    a = 2*j
+                    u = random.randint(1, len(child[a]) - 1)
+                    w = random.randint(1, len(child[a+1]) - 1)
+                    child_1 = child[a][:u].copy()
+                    child_2 = child[a][u:].copy()
+                    child_3 = child[a+1][:w].copy()
+                    child_4 = child[a+1][w:].copy()
+                    child_a = child_1+child_3
+                    child_b = child_2+child_4
+                    child[a] = child_a
+                    child[a+1] = child_b
+                children[i] = child.copy()
+        return children
+
+    def _get_best_solution(self, population):
+        """获取最优解"""
+        graded = [[self._get_total_distance(x)[0], x] for x in population]
+        graded = sorted(graded, key=lambda x: x[0])
+        return graded[0][0], graded[0][1]
+
+    def solve(self):
+        """
+        求解MTSP，加入早停机制
+        当连续50轮没有更好的解时停止迭代
+        """
+        # 初始化种群
+        population = [self._create_individual() for _ in range(self.population_size)]
+        
+        # 初始化早停相关变量
+        best_distance = float('inf')
+        early_stop_counter = 0
+        early_stop_threshold = 100
+        
+        # 迭代优化
+        for i in range(self.max_iterations):
+            parents = self._selection(population)
+            children = self._crossover(parents)
+            children = self._mutation(children)
+            population = parents + children
+            
+            # 记录当前最优解
+            current_distance, current_path = self._get_best_solution(population)
+            self.distance_history.append(current_distance)
+            self.best_path_history.append(current_path)
+            
+            # 早停判断
+            if current_distance < best_distance:
+                best_distance = current_distance
+                best_path = current_path
+                early_stop_counter = 0  # 重置计数器
+            else:
+                early_stop_counter += 1
+            
+            # 如果连续50轮没有更好的解，则停止迭代
+            if early_stop_counter >= early_stop_threshold:
+                print(f"Early stopping at iteration {i} due to no improvement in {early_stop_threshold} iterations")
+                break
+        
+        # 返回最优解
+        return best_distance, best_path
+
+    def plot_convergence(self):
+        """绘制收敛曲线"""
+        plt.plot(range(len(self.distance_history)), self.distance_history)
+        plt.xlabel('Iteration')
+        plt.ylabel('Total Distance')
+        plt.title('Convergence Curve')
+        plt.show()
+
+
+def main():
+    # 城市坐标
+    cities = np.array([
+        (456, 320),  # 起点（基地）
+        (228, 0),
+        (912, 0),
+        (0, 80),
+        (114, 80),
+        (570, 160),
+        (798, 160),
+        (342, 240),
+        (684, 240),
+        (570, 400),
+        (912, 400),
+        (114, 480),
+        (228, 480),
+        (342, 560),
+        (684, 560),
+        (0, 640),
+        (798, 640)
+    ])
+
+    # 设置随机种子
+    np.random.seed(42)
+    random.seed(42)
+
+    # 创建求解器实例
+    solver = MTSP_GA(
+        cities=cities,
+        vehicle_num=4,
+        population_size=200,
+        max_iterations=1500
+    )
+
+    # 求解
+    start_time = time.time()
+    best_distance, best_path = solver.solve()
+    end_time = time.time()
+
+    # 输出结果
+    print(f"最优总距离: {best_distance:.2f}")
+    print("最优路径方案:")
+    for i, path in enumerate(best_path):
+        print(f"车辆{i+1}的路径: {path}")
+    print(f"求解时间: {end_time - start_time:.2f}秒")
+
+    # 绘制收敛曲线
+    solver.plot_convergence()
+
+
+if __name__ == "__main__":
+    main()
--- a/human_action.py
+++ b/human_action.py
@ -9,10 +9,12 @@ state = env.reset()
 print('state:', state)

 # action_series = [[0.67], [0], [0], [0], [0.7]]
-action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0]
+# action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0]
 # action_series = [1] * 30
 # action_series = [[0.2], [0.4], [0.7], [0.5]]
 # action_series = [[-0.08], [-0.08], [0], [0]]
+action_series = [3, 5, 3, 5, 3, 5, 3, 5, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4,
+                 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4]

 for i in range(100):
    action = action_series[i]
--- a/mtkl_sovler2.py
+++ b/mtkl_sovler2.py
@ -12,7 +12,7 @@ random.seed(42)
 # ---------------------------
 # 需要修改的超参数
 # ---------------------------
-num_iterations = 3000000000
+num_iterations = 300000000
 # 随机生成分区的行分段数与列分段数
 R = random.randint(0, 3)  # 行分段数
 C = random.randint(0, 3)  # 列分段数
@ -48,7 +48,7 @@ best_solution = None
 for iteration in tqdm(range(num_iterations), desc="蒙特卡洛模拟进度"):
    # 直接切值
    # horiz = [random.random() for _ in range(R)]
-    horiz = [random.randint(1, 999)/1000 for _ in range(R)]
+    horiz = [random.randint(1, 99)/100 for _ in range(R)]
    horiz = sorted(set(horiz))
    horiz = horiz if horiz else []
    row_boundaries = [0] + horiz + [1]