修改dqn参数

2025-04-02 21:33:40 +08:00 · 2025-04-02 21:33:40 +08:00 · 0be9fa596a
commit 0be9fa596a
parent 981681c1bd
5 changed files with 260 additions and 8 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,7 +9,7 @@ __pycache__/
 # Pytorch weights
 model/
-logs/
+runs/
 # Distribution / packaging
 .Python
--- a/Duel_Double_DQN/main.py
+++ b/Duel_Double_DQN/main.py
@ -18,7 +18,7 @@ parser = argparse.ArgumentParser()
 parser.add_argument('--dvc', type=str, default='cpu',
                    help='running device: cuda or cpu')
 parser.add_argument('--EnvIdex', type=int, default=0, help='CP-v1, LLd-v2')
-parser.add_argument('--write', type=str2bool, default=False,
+parser.add_argument('--write', type=str2bool, default=True,
                    help='Use SummaryWriter to record the training')
 parser.add_argument('--render', type=str2bool,
                    default=False, help='Render or Not')
@ -31,10 +31,10 @@ parser.add_argument('--seed', type=int, default=0, help='random seed')
 parser.add_argument('--Max_train_steps', type=int,
                    default=int(1e8), help='Max training steps')
 parser.add_argument('--save_interval', type=int,
-                    default=int(5e3), help='Model saving interval, in steps.')
+                    default=int(5e4), help='Model saving interval, in steps.')
 parser.add_argument('--eval_interval', type=int, default=int(2e3),
                    help='Model evaluating interval, in steps.')
-parser.add_argument('--random_steps', type=int, default=int(3e3),
+parser.add_argument('--random_steps', type=int, default=int(6e3),
                    help='steps for random policy to explore')
 parser.add_argument('--update_every', type=int,
                    default=10, help='training frequency')
@ -47,7 +47,7 @@ parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate')
 parser.add_argument('--batch_size', type=int, default=256,
                    help='lenth of sliced trajectory')
 parser.add_argument('--exp_noise', type=float,
-                    default=0.2, help='explore noise')
+                    default=0.3, help='explore noise')
 parser.add_argument('--noise_decay', type=float, default=0.99,
                    help='decay rate of explore noise')
 parser.add_argument('--Double', type=str2bool, default=False,
--- a/GA/GA_MTSP.py
+++ b/GA/GA_MTSP.py
@ -0,0 +1,250 @@
 import numpy as np
 import random
 import math
 import matplotlib.pyplot as plt
 import time
 class MTSP_GA:
    def __init__(self, cities, vehicle_num, population_size=200, max_iterations=1500):
        """
        初始化遗传算法求解器
        Args:
            cities: 城市坐标数组，第一个城市为起始点
            vehicle_num: 车辆数量
            population_size: 种群大小
            max_iterations: 最大迭代次数
        """
        self.cities = np.array(cities)
        self.city_count = len(cities)
        self.vehicle_num = vehicle_num
        self.origin = 0  # 起始点
        # GA参数
        self.population_size = population_size
        self.max_iterations = max_iterations
        self.retain_rate = 0.3      # 强者存活率
        self.random_rate = 0.5      # 弱者存活概率
        self.mutation_rate = 0.3    # 变异率
        # 计算距离矩阵
        self.distance_matrix = self._compute_distance_matrix()
        # 记录收敛过程
        self.distance_history = []
        self.best_path_history = []
    def _compute_distance_matrix(self):
        """计算城市间距离矩阵"""
        distance = np.zeros((self.city_count, self.city_count))
        for i in range(self.city_count):
            for j in range(self.city_count):
                distance[i][j] = math.sqrt(
                    (self.cities[i][0] - self.cities[j][0]) ** 2 +
                    (self.cities[i][1] - self.cities[j][1]) ** 2
                )
        return distance
    def _create_individual(self):
        """生成初始个体"""
        index = [i for i in range(self.city_count)]
        index.remove(self.origin)
        a = int(np.floor(len(index)/self.vehicle_num))
        X = []
        for i in range(self.vehicle_num):
            if i < self.vehicle_num-1:
                x = index[a*i:a*(i+1)]
            else:
                x = index[a*i:]
            X.append(x)
        return X
    def _get_total_distance(self, X):
        """计算路径总距离"""
        distance = 0
        distance_list = []
        for x in X:
            d = self.distance_matrix[self.origin][x[0]]  # 从起点到第一个城市
            d += self.distance_matrix[self.origin][x[-1]]  # 从最后一个城市返回起点
            for i in range(len(x)-1):
                d += self.distance_matrix[x[i]][x[i+1]]
            distance += d
            distance_list.append(d)
        return distance, distance_list
    def _selection(self, population):
        """选择操作"""
        graded = [[self._get_total_distance(x)[0], x] for x in population]
        graded = [x[1] for x in sorted(graded)]
        retain_length = int(len(graded) * self.retain_rate)
        parents = graded[:retain_length]
        for chromosome in graded[retain_length:]:
            if random.random() < self.random_rate:
                parents.append(chromosome)
        return parents
    def _crossover(self, parents):
        """交叉操作"""
        target_count = self.population_size - len(parents)
        children = []
        while len(children) < target_count:
            male_index = random.randint(0, len(parents) - 1)
            female_index = random.randint(0, len(parents) - 1)
            if male_index != female_index:
                male = parents[male_index]
                female = parents[female_index]
                gene1 = []
                gene2 = []
                for i in range(len(male)):
                    gene1 += male[i]
                    gene2 += female[i]
                left = random.randint(0, len(gene1)//2)
                right = random.randint(left + 1, len(gene1))
                cut = gene1[left:right]
                copy = gene2.copy()
                for j in cut:
                    copy.remove(j)
                child = copy + cut
                a = int(np.floor(len(child)/self.vehicle_num))
                child_c = []
                for i in range(self.vehicle_num):
                    if i < self.vehicle_num - 1:
                        x = child[a * i:a * (i + 1)]
                    else:
                        x = child[a * i:]
                    child_c.append(x)
                children.append(child_c)
        return children
    def _mutation(self, children):
        """变异操作"""
        for i in range(len(children)):
            if random.random() < self.mutation_rate:
                child = children[i]
                for j in range(int(np.floor(len(child)/2))):
                    a = 2*j
                    u = random.randint(1, len(child[a]) - 1)
                    w = random.randint(1, len(child[a+1]) - 1)
                    child_1 = child[a][:u].copy()
                    child_2 = child[a][u:].copy()
                    child_3 = child[a+1][:w].copy()
                    child_4 = child[a+1][w:].copy()
                    child_a = child_1+child_3
                    child_b = child_2+child_4
                    child[a] = child_a
                    child[a+1] = child_b
                children[i] = child.copy()
        return children
    def _get_best_solution(self, population):
        """获取最优解"""
        graded = [[self._get_total_distance(x)[0], x] for x in population]
        graded = sorted(graded, key=lambda x: x[0])
        return graded[0][0], graded[0][1]
    def solve(self):
        """
        求解MTSP，加入早停机制
        当连续50轮没有更好的解时停止迭代
        """
        # 初始化种群
        population = [self._create_individual() for _ in range(self.population_size)]
        # 初始化早停相关变量
        best_distance = float('inf')
        early_stop_counter = 0
        early_stop_threshold = 100
        # 迭代优化
        for i in range(self.max_iterations):
            parents = self._selection(population)
            children = self._crossover(parents)
            children = self._mutation(children)
            population = parents + children
            # 记录当前最优解
            current_distance, current_path = self._get_best_solution(population)
            self.distance_history.append(current_distance)
            self.best_path_history.append(current_path)
            # 早停判断
            if current_distance < best_distance:
                best_distance = current_distance
                best_path = current_path
                early_stop_counter = 0  # 重置计数器
            else:
                early_stop_counter += 1
            # 如果连续50轮没有更好的解，则停止迭代
            if early_stop_counter >= early_stop_threshold:
                print(f"Early stopping at iteration {i} due to no improvement in {early_stop_threshold} iterations")
                break
        # 返回最优解
        return best_distance, best_path
    def plot_convergence(self):
        """绘制收敛曲线"""
        plt.plot(range(len(self.distance_history)), self.distance_history)
        plt.xlabel('Iteration')
        plt.ylabel('Total Distance')
        plt.title('Convergence Curve')
        plt.show()
 def main():
    # 城市坐标
    cities = np.array([
        (456, 320),  # 起点（基地）
        (228, 0),
        (912, 0),
        (0, 80),
        (114, 80),
        (570, 160),
        (798, 160),
        (342, 240),
        (684, 240),
        (570, 400),
        (912, 400),
        (114, 480),
        (228, 480),
        (342, 560),
        (684, 560),
        (0, 640),
        (798, 640)
    ])
    # 设置随机种子
    np.random.seed(42)
    random.seed(42)
    # 创建求解器实例
    solver = MTSP_GA(
        cities=cities,
        vehicle_num=4,
        population_size=200,
        max_iterations=1500
    )
    # 求解
    start_time = time.time()
    best_distance, best_path = solver.solve()
    end_time = time.time()
    # 输出结果
    print(f"最优总距离: {best_distance:.2f}")
    print("最优路径方案:")
    for i, path in enumerate(best_path):
        print(f"车辆{i+1}的路径: {path}")
    print(f"求解时间: {end_time - start_time:.2f}秒")
    # 绘制收敛曲线
    solver.plot_convergence()
 if __name__ == "__main__":
    main()
--- a/human_action.py
+++ b/human_action.py
@ -9,10 +9,12 @@ state = env.reset()
 print('state:', state)
 # action_series = [[0.67], [0], [0], [0], [0.7]]
-action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0]
+# action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0]
 # action_series = [1] * 30
 # action_series = [[0.2], [0.4], [0.7], [0.5]]
 # action_series = [[-0.08], [-0.08], [0], [0]]
 action_series = [3, 5, 3, 5, 3, 5, 3, 5, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4,
                 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4]
 for i in range(100):
    action = action_series[i]
--- a/mtkl_sovler2.py
+++ b/mtkl_sovler2.py
@ -12,7 +12,7 @@ random.seed(42)
 # ---------------------------
 # 需要修改的超参数
 # ---------------------------
-num_iterations = 3000000000
+num_iterations = 300000000
 # 随机生成分区的行分段数与列分段数
 R = random.randint(0, 3)  # 行分段数
 C = random.randint(0, 3)  # 列分段数
@ -48,7 +48,7 @@ best_solution = None
 for iteration in tqdm(range(num_iterations), desc="蒙特卡洛模拟进度"):
    # 直接切值
    # horiz = [random.random() for _ in range(R)]
-    horiz = [random.randint(1, 999)/1000 for _ in range(R)]
+    horiz = [random.randint(1, 99)/100 for _ in range(R)]
    horiz = sorted(set(horiz))
    horiz = horiz if horiz else []
    row_boundaries = [0] + horiz + [1]