From 0be9fa596a76de3ae3b591f1e252ae54c6f4f58c Mon Sep 17 00:00:00 2001 From: weixin_46229132 Date: Wed, 2 Apr 2025 21:33:40 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9dqn=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 +- Duel_Double_DQN/main.py | 8 +- GA/GA_MTSP.py | 250 ++++++++++++++++++++++++++++++++++++++++ human_action.py | 4 +- mtkl_sovler2.py | 4 +- 5 files changed, 260 insertions(+), 8 deletions(-) create mode 100644 GA/GA_MTSP.py diff --git a/.gitignore b/.gitignore index 0f27cde..713d891 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,7 @@ __pycache__/ # Pytorch weights model/ -logs/ +runs/ # Distribution / packaging .Python diff --git a/Duel_Double_DQN/main.py b/Duel_Double_DQN/main.py index e5b2417..d54aa54 100644 --- a/Duel_Double_DQN/main.py +++ b/Duel_Double_DQN/main.py @@ -18,7 +18,7 @@ parser = argparse.ArgumentParser() parser.add_argument('--dvc', type=str, default='cpu', help='running device: cuda or cpu') parser.add_argument('--EnvIdex', type=int, default=0, help='CP-v1, LLd-v2') -parser.add_argument('--write', type=str2bool, default=False, +parser.add_argument('--write', type=str2bool, default=True, help='Use SummaryWriter to record the training') parser.add_argument('--render', type=str2bool, default=False, help='Render or Not') @@ -31,10 +31,10 @@ parser.add_argument('--seed', type=int, default=0, help='random seed') parser.add_argument('--Max_train_steps', type=int, default=int(1e8), help='Max training steps') parser.add_argument('--save_interval', type=int, - default=int(5e3), help='Model saving interval, in steps.') + default=int(5e4), help='Model saving interval, in steps.') parser.add_argument('--eval_interval', type=int, default=int(2e3), help='Model evaluating interval, in steps.') -parser.add_argument('--random_steps', type=int, default=int(3e3), +parser.add_argument('--random_steps', type=int, default=int(6e3), help='steps for random policy to explore') parser.add_argument('--update_every', type=int, default=10, help='training frequency') @@ -47,7 +47,7 @@ parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate') parser.add_argument('--batch_size', type=int, default=256, help='lenth of sliced trajectory') parser.add_argument('--exp_noise', type=float, - default=0.2, help='explore noise') + default=0.3, help='explore noise') parser.add_argument('--noise_decay', type=float, default=0.99, help='decay rate of explore noise') parser.add_argument('--Double', type=str2bool, default=False, diff --git a/GA/GA_MTSP.py b/GA/GA_MTSP.py new file mode 100644 index 0000000..f57150f --- /dev/null +++ b/GA/GA_MTSP.py @@ -0,0 +1,250 @@ +import numpy as np +import random +import math +import matplotlib.pyplot as plt +import time + + +class MTSP_GA: + def __init__(self, cities, vehicle_num, population_size=200, max_iterations=1500): + """ + 初始化遗传算法求解器 + Args: + cities: 城市坐标数组,第一个城市为起始点 + vehicle_num: 车辆数量 + population_size: 种群大小 + max_iterations: 最大迭代次数 + """ + self.cities = np.array(cities) + self.city_count = len(cities) + self.vehicle_num = vehicle_num + self.origin = 0 # 起始点 + + # GA参数 + self.population_size = population_size + self.max_iterations = max_iterations + self.retain_rate = 0.3 # 强者存活率 + self.random_rate = 0.5 # 弱者存活概率 + self.mutation_rate = 0.3 # 变异率 + + # 计算距离矩阵 + self.distance_matrix = self._compute_distance_matrix() + + # 记录收敛过程 + self.distance_history = [] + self.best_path_history = [] + + def _compute_distance_matrix(self): + """计算城市间距离矩阵""" + distance = np.zeros((self.city_count, self.city_count)) + for i in range(self.city_count): + for j in range(self.city_count): + distance[i][j] = math.sqrt( + (self.cities[i][0] - self.cities[j][0]) ** 2 + + (self.cities[i][1] - self.cities[j][1]) ** 2 + ) + return distance + + def _create_individual(self): + """生成初始个体""" + index = [i for i in range(self.city_count)] + index.remove(self.origin) + a = int(np.floor(len(index)/self.vehicle_num)) + X = [] + for i in range(self.vehicle_num): + if i < self.vehicle_num-1: + x = index[a*i:a*(i+1)] + else: + x = index[a*i:] + X.append(x) + return X + + def _get_total_distance(self, X): + """计算路径总距离""" + distance = 0 + distance_list = [] + for x in X: + d = self.distance_matrix[self.origin][x[0]] # 从起点到第一个城市 + d += self.distance_matrix[self.origin][x[-1]] # 从最后一个城市返回起点 + for i in range(len(x)-1): + d += self.distance_matrix[x[i]][x[i+1]] + distance += d + distance_list.append(d) + return distance, distance_list + + def _selection(self, population): + """选择操作""" + graded = [[self._get_total_distance(x)[0], x] for x in population] + graded = [x[1] for x in sorted(graded)] + retain_length = int(len(graded) * self.retain_rate) + parents = graded[:retain_length] + + for chromosome in graded[retain_length:]: + if random.random() < self.random_rate: + parents.append(chromosome) + return parents + + def _crossover(self, parents): + """交叉操作""" + target_count = self.population_size - len(parents) + children = [] + while len(children) < target_count: + male_index = random.randint(0, len(parents) - 1) + female_index = random.randint(0, len(parents) - 1) + if male_index != female_index: + male = parents[male_index] + female = parents[female_index] + + gene1 = [] + gene2 = [] + for i in range(len(male)): + gene1 += male[i] + gene2 += female[i] + + left = random.randint(0, len(gene1)//2) + right = random.randint(left + 1, len(gene1)) + cut = gene1[left:right] + copy = gene2.copy() + for j in cut: + copy.remove(j) + + child = copy + cut + a = int(np.floor(len(child)/self.vehicle_num)) + child_c = [] + for i in range(self.vehicle_num): + if i < self.vehicle_num - 1: + x = child[a * i:a * (i + 1)] + else: + x = child[a * i:] + child_c.append(x) + children.append(child_c) + return children + + def _mutation(self, children): + """变异操作""" + for i in range(len(children)): + if random.random() < self.mutation_rate: + child = children[i] + for j in range(int(np.floor(len(child)/2))): + a = 2*j + u = random.randint(1, len(child[a]) - 1) + w = random.randint(1, len(child[a+1]) - 1) + child_1 = child[a][:u].copy() + child_2 = child[a][u:].copy() + child_3 = child[a+1][:w].copy() + child_4 = child[a+1][w:].copy() + child_a = child_1+child_3 + child_b = child_2+child_4 + child[a] = child_a + child[a+1] = child_b + children[i] = child.copy() + return children + + def _get_best_solution(self, population): + """获取最优解""" + graded = [[self._get_total_distance(x)[0], x] for x in population] + graded = sorted(graded, key=lambda x: x[0]) + return graded[0][0], graded[0][1] + + def solve(self): + """ + 求解MTSP,加入早停机制 + 当连续50轮没有更好的解时停止迭代 + """ + # 初始化种群 + population = [self._create_individual() for _ in range(self.population_size)] + + # 初始化早停相关变量 + best_distance = float('inf') + early_stop_counter = 0 + early_stop_threshold = 100 + + # 迭代优化 + for i in range(self.max_iterations): + parents = self._selection(population) + children = self._crossover(parents) + children = self._mutation(children) + population = parents + children + + # 记录当前最优解 + current_distance, current_path = self._get_best_solution(population) + self.distance_history.append(current_distance) + self.best_path_history.append(current_path) + + # 早停判断 + if current_distance < best_distance: + best_distance = current_distance + best_path = current_path + early_stop_counter = 0 # 重置计数器 + else: + early_stop_counter += 1 + + # 如果连续50轮没有更好的解,则停止迭代 + if early_stop_counter >= early_stop_threshold: + print(f"Early stopping at iteration {i} due to no improvement in {early_stop_threshold} iterations") + break + + # 返回最优解 + return best_distance, best_path + + def plot_convergence(self): + """绘制收敛曲线""" + plt.plot(range(len(self.distance_history)), self.distance_history) + plt.xlabel('Iteration') + plt.ylabel('Total Distance') + plt.title('Convergence Curve') + plt.show() + + +def main(): + # 城市坐标 + cities = np.array([ + (456, 320), # 起点(基地) + (228, 0), + (912, 0), + (0, 80), + (114, 80), + (570, 160), + (798, 160), + (342, 240), + (684, 240), + (570, 400), + (912, 400), + (114, 480), + (228, 480), + (342, 560), + (684, 560), + (0, 640), + (798, 640) + ]) + + # 设置随机种子 + np.random.seed(42) + random.seed(42) + + # 创建求解器实例 + solver = MTSP_GA( + cities=cities, + vehicle_num=4, + population_size=200, + max_iterations=1500 + ) + + # 求解 + start_time = time.time() + best_distance, best_path = solver.solve() + end_time = time.time() + + # 输出结果 + print(f"最优总距离: {best_distance:.2f}") + print("最优路径方案:") + for i, path in enumerate(best_path): + print(f"车辆{i+1}的路径: {path}") + print(f"求解时间: {end_time - start_time:.2f}秒") + + # 绘制收敛曲线 + solver.plot_convergence() + + +if __name__ == "__main__": + main() diff --git a/human_action.py b/human_action.py index 12c67b0..1436db3 100644 --- a/human_action.py +++ b/human_action.py @@ -9,10 +9,12 @@ state = env.reset() print('state:', state) # action_series = [[0.67], [0], [0], [0], [0.7]] -action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0] +# action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0] # action_series = [1] * 30 # action_series = [[0.2], [0.4], [0.7], [0.5]] # action_series = [[-0.08], [-0.08], [0], [0]] +action_series = [3, 5, 3, 5, 3, 5, 3, 5, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, + 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4] for i in range(100): action = action_series[i] diff --git a/mtkl_sovler2.py b/mtkl_sovler2.py index c25a00a..ae53b2e 100644 --- a/mtkl_sovler2.py +++ b/mtkl_sovler2.py @@ -12,7 +12,7 @@ random.seed(42) # --------------------------- # 需要修改的超参数 # --------------------------- -num_iterations = 3000000000 +num_iterations = 300000000 # 随机生成分区的行分段数与列分段数 R = random.randint(0, 3) # 行分段数 C = random.randint(0, 3) # 列分段数 @@ -48,7 +48,7 @@ best_solution = None for iteration in tqdm(range(num_iterations), desc="蒙特卡洛模拟进度"): # 直接切值 # horiz = [random.random() for _ in range(R)] - horiz = [random.randint(1, 999)/1000 for _ in range(R)] + horiz = [random.randint(1, 99)/100 for _ in range(R)] horiz = sorted(set(horiz)) horiz = horiz if horiz else [] row_boundaries = [0] + horiz + [1]