修改dqn参数

This commit is contained in:
weixin_46229132 2025-04-02 21:33:40 +08:00
parent 981681c1bd
commit 0be9fa596a
5 changed files with 260 additions and 8 deletions

2
.gitignore vendored
View File

@ -9,7 +9,7 @@ __pycache__/
# Pytorch weights # Pytorch weights
model/ model/
logs/ runs/
# Distribution / packaging # Distribution / packaging
.Python .Python

View File

@ -18,7 +18,7 @@ parser = argparse.ArgumentParser()
parser.add_argument('--dvc', type=str, default='cpu', parser.add_argument('--dvc', type=str, default='cpu',
help='running device: cuda or cpu') help='running device: cuda or cpu')
parser.add_argument('--EnvIdex', type=int, default=0, help='CP-v1, LLd-v2') parser.add_argument('--EnvIdex', type=int, default=0, help='CP-v1, LLd-v2')
parser.add_argument('--write', type=str2bool, default=False, parser.add_argument('--write', type=str2bool, default=True,
help='Use SummaryWriter to record the training') help='Use SummaryWriter to record the training')
parser.add_argument('--render', type=str2bool, parser.add_argument('--render', type=str2bool,
default=False, help='Render or Not') default=False, help='Render or Not')
@ -31,10 +31,10 @@ parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument('--Max_train_steps', type=int, parser.add_argument('--Max_train_steps', type=int,
default=int(1e8), help='Max training steps') default=int(1e8), help='Max training steps')
parser.add_argument('--save_interval', type=int, parser.add_argument('--save_interval', type=int,
default=int(5e3), help='Model saving interval, in steps.') default=int(5e4), help='Model saving interval, in steps.')
parser.add_argument('--eval_interval', type=int, default=int(2e3), parser.add_argument('--eval_interval', type=int, default=int(2e3),
help='Model evaluating interval, in steps.') help='Model evaluating interval, in steps.')
parser.add_argument('--random_steps', type=int, default=int(3e3), parser.add_argument('--random_steps', type=int, default=int(6e3),
help='steps for random policy to explore') help='steps for random policy to explore')
parser.add_argument('--update_every', type=int, parser.add_argument('--update_every', type=int,
default=10, help='training frequency') default=10, help='training frequency')
@ -47,7 +47,7 @@ parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate')
parser.add_argument('--batch_size', type=int, default=256, parser.add_argument('--batch_size', type=int, default=256,
help='lenth of sliced trajectory') help='lenth of sliced trajectory')
parser.add_argument('--exp_noise', type=float, parser.add_argument('--exp_noise', type=float,
default=0.2, help='explore noise') default=0.3, help='explore noise')
parser.add_argument('--noise_decay', type=float, default=0.99, parser.add_argument('--noise_decay', type=float, default=0.99,
help='decay rate of explore noise') help='decay rate of explore noise')
parser.add_argument('--Double', type=str2bool, default=False, parser.add_argument('--Double', type=str2bool, default=False,

250
GA/GA_MTSP.py Normal file
View File

@ -0,0 +1,250 @@
import numpy as np
import random
import math
import matplotlib.pyplot as plt
import time
class MTSP_GA:
def __init__(self, cities, vehicle_num, population_size=200, max_iterations=1500):
"""
初始化遗传算法求解器
Args:
cities: 城市坐标数组第一个城市为起始点
vehicle_num: 车辆数量
population_size: 种群大小
max_iterations: 最大迭代次数
"""
self.cities = np.array(cities)
self.city_count = len(cities)
self.vehicle_num = vehicle_num
self.origin = 0 # 起始点
# GA参数
self.population_size = population_size
self.max_iterations = max_iterations
self.retain_rate = 0.3 # 强者存活率
self.random_rate = 0.5 # 弱者存活概率
self.mutation_rate = 0.3 # 变异率
# 计算距离矩阵
self.distance_matrix = self._compute_distance_matrix()
# 记录收敛过程
self.distance_history = []
self.best_path_history = []
def _compute_distance_matrix(self):
"""计算城市间距离矩阵"""
distance = np.zeros((self.city_count, self.city_count))
for i in range(self.city_count):
for j in range(self.city_count):
distance[i][j] = math.sqrt(
(self.cities[i][0] - self.cities[j][0]) ** 2 +
(self.cities[i][1] - self.cities[j][1]) ** 2
)
return distance
def _create_individual(self):
"""生成初始个体"""
index = [i for i in range(self.city_count)]
index.remove(self.origin)
a = int(np.floor(len(index)/self.vehicle_num))
X = []
for i in range(self.vehicle_num):
if i < self.vehicle_num-1:
x = index[a*i:a*(i+1)]
else:
x = index[a*i:]
X.append(x)
return X
def _get_total_distance(self, X):
"""计算路径总距离"""
distance = 0
distance_list = []
for x in X:
d = self.distance_matrix[self.origin][x[0]] # 从起点到第一个城市
d += self.distance_matrix[self.origin][x[-1]] # 从最后一个城市返回起点
for i in range(len(x)-1):
d += self.distance_matrix[x[i]][x[i+1]]
distance += d
distance_list.append(d)
return distance, distance_list
def _selection(self, population):
"""选择操作"""
graded = [[self._get_total_distance(x)[0], x] for x in population]
graded = [x[1] for x in sorted(graded)]
retain_length = int(len(graded) * self.retain_rate)
parents = graded[:retain_length]
for chromosome in graded[retain_length:]:
if random.random() < self.random_rate:
parents.append(chromosome)
return parents
def _crossover(self, parents):
"""交叉操作"""
target_count = self.population_size - len(parents)
children = []
while len(children) < target_count:
male_index = random.randint(0, len(parents) - 1)
female_index = random.randint(0, len(parents) - 1)
if male_index != female_index:
male = parents[male_index]
female = parents[female_index]
gene1 = []
gene2 = []
for i in range(len(male)):
gene1 += male[i]
gene2 += female[i]
left = random.randint(0, len(gene1)//2)
right = random.randint(left + 1, len(gene1))
cut = gene1[left:right]
copy = gene2.copy()
for j in cut:
copy.remove(j)
child = copy + cut
a = int(np.floor(len(child)/self.vehicle_num))
child_c = []
for i in range(self.vehicle_num):
if i < self.vehicle_num - 1:
x = child[a * i:a * (i + 1)]
else:
x = child[a * i:]
child_c.append(x)
children.append(child_c)
return children
def _mutation(self, children):
"""变异操作"""
for i in range(len(children)):
if random.random() < self.mutation_rate:
child = children[i]
for j in range(int(np.floor(len(child)/2))):
a = 2*j
u = random.randint(1, len(child[a]) - 1)
w = random.randint(1, len(child[a+1]) - 1)
child_1 = child[a][:u].copy()
child_2 = child[a][u:].copy()
child_3 = child[a+1][:w].copy()
child_4 = child[a+1][w:].copy()
child_a = child_1+child_3
child_b = child_2+child_4
child[a] = child_a
child[a+1] = child_b
children[i] = child.copy()
return children
def _get_best_solution(self, population):
"""获取最优解"""
graded = [[self._get_total_distance(x)[0], x] for x in population]
graded = sorted(graded, key=lambda x: x[0])
return graded[0][0], graded[0][1]
def solve(self):
"""
求解MTSP加入早停机制
当连续50轮没有更好的解时停止迭代
"""
# 初始化种群
population = [self._create_individual() for _ in range(self.population_size)]
# 初始化早停相关变量
best_distance = float('inf')
early_stop_counter = 0
early_stop_threshold = 100
# 迭代优化
for i in range(self.max_iterations):
parents = self._selection(population)
children = self._crossover(parents)
children = self._mutation(children)
population = parents + children
# 记录当前最优解
current_distance, current_path = self._get_best_solution(population)
self.distance_history.append(current_distance)
self.best_path_history.append(current_path)
# 早停判断
if current_distance < best_distance:
best_distance = current_distance
best_path = current_path
early_stop_counter = 0 # 重置计数器
else:
early_stop_counter += 1
# 如果连续50轮没有更好的解则停止迭代
if early_stop_counter >= early_stop_threshold:
print(f"Early stopping at iteration {i} due to no improvement in {early_stop_threshold} iterations")
break
# 返回最优解
return best_distance, best_path
def plot_convergence(self):
"""绘制收敛曲线"""
plt.plot(range(len(self.distance_history)), self.distance_history)
plt.xlabel('Iteration')
plt.ylabel('Total Distance')
plt.title('Convergence Curve')
plt.show()
def main():
# 城市坐标
cities = np.array([
(456, 320), # 起点(基地)
(228, 0),
(912, 0),
(0, 80),
(114, 80),
(570, 160),
(798, 160),
(342, 240),
(684, 240),
(570, 400),
(912, 400),
(114, 480),
(228, 480),
(342, 560),
(684, 560),
(0, 640),
(798, 640)
])
# 设置随机种子
np.random.seed(42)
random.seed(42)
# 创建求解器实例
solver = MTSP_GA(
cities=cities,
vehicle_num=4,
population_size=200,
max_iterations=1500
)
# 求解
start_time = time.time()
best_distance, best_path = solver.solve()
end_time = time.time()
# 输出结果
print(f"最优总距离: {best_distance:.2f}")
print("最优路径方案:")
for i, path in enumerate(best_path):
print(f"车辆{i+1}的路径: {path}")
print(f"求解时间: {end_time - start_time:.2f}")
# 绘制收敛曲线
solver.plot_convergence()
if __name__ == "__main__":
main()

View File

@ -9,10 +9,12 @@ state = env.reset()
print('state:', state) print('state:', state)
# action_series = [[0.67], [0], [0], [0], [0.7]] # action_series = [[0.67], [0], [0], [0], [0.7]]
action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0] # action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0]
# action_series = [1] * 30 # action_series = [1] * 30
# action_series = [[0.2], [0.4], [0.7], [0.5]] # action_series = [[0.2], [0.4], [0.7], [0.5]]
# action_series = [[-0.08], [-0.08], [0], [0]] # action_series = [[-0.08], [-0.08], [0], [0]]
action_series = [3, 5, 3, 5, 3, 5, 3, 5, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4,
3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4]
for i in range(100): for i in range(100):
action = action_series[i] action = action_series[i]

View File

@ -12,7 +12,7 @@ random.seed(42)
# --------------------------- # ---------------------------
# 需要修改的超参数 # 需要修改的超参数
# --------------------------- # ---------------------------
num_iterations = 3000000000 num_iterations = 300000000
# 随机生成分区的行分段数与列分段数 # 随机生成分区的行分段数与列分段数
R = random.randint(0, 3) # 行分段数 R = random.randint(0, 3) # 行分段数
C = random.randint(0, 3) # 列分段数 C = random.randint(0, 3) # 列分段数
@ -48,7 +48,7 @@ best_solution = None
for iteration in tqdm(range(num_iterations), desc="蒙特卡洛模拟进度"): for iteration in tqdm(range(num_iterations), desc="蒙特卡洛模拟进度"):
# 直接切值 # 直接切值
# horiz = [random.random() for _ in range(R)] # horiz = [random.random() for _ in range(R)]
horiz = [random.randint(1, 999)/1000 for _ in range(R)] horiz = [random.randint(1, 99)/100 for _ in range(R)]
horiz = sorted(set(horiz)) horiz = sorted(set(horiz))
horiz = horiz if horiz else [] horiz = horiz if horiz else []
row_boundaries = [0] + horiz + [1] row_boundaries = [0] + horiz + [1]