修改dqn参数
This commit is contained in:
parent
981681c1bd
commit
0be9fa596a
2
.gitignore
vendored
2
.gitignore
vendored
@ -9,7 +9,7 @@ __pycache__/
|
||||
|
||||
# Pytorch weights
|
||||
model/
|
||||
logs/
|
||||
runs/
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
|
@ -18,7 +18,7 @@ parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--dvc', type=str, default='cpu',
|
||||
help='running device: cuda or cpu')
|
||||
parser.add_argument('--EnvIdex', type=int, default=0, help='CP-v1, LLd-v2')
|
||||
parser.add_argument('--write', type=str2bool, default=False,
|
||||
parser.add_argument('--write', type=str2bool, default=True,
|
||||
help='Use SummaryWriter to record the training')
|
||||
parser.add_argument('--render', type=str2bool,
|
||||
default=False, help='Render or Not')
|
||||
@ -31,10 +31,10 @@ parser.add_argument('--seed', type=int, default=0, help='random seed')
|
||||
parser.add_argument('--Max_train_steps', type=int,
|
||||
default=int(1e8), help='Max training steps')
|
||||
parser.add_argument('--save_interval', type=int,
|
||||
default=int(5e3), help='Model saving interval, in steps.')
|
||||
default=int(5e4), help='Model saving interval, in steps.')
|
||||
parser.add_argument('--eval_interval', type=int, default=int(2e3),
|
||||
help='Model evaluating interval, in steps.')
|
||||
parser.add_argument('--random_steps', type=int, default=int(3e3),
|
||||
parser.add_argument('--random_steps', type=int, default=int(6e3),
|
||||
help='steps for random policy to explore')
|
||||
parser.add_argument('--update_every', type=int,
|
||||
default=10, help='training frequency')
|
||||
@ -47,7 +47,7 @@ parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate')
|
||||
parser.add_argument('--batch_size', type=int, default=256,
|
||||
help='lenth of sliced trajectory')
|
||||
parser.add_argument('--exp_noise', type=float,
|
||||
default=0.2, help='explore noise')
|
||||
default=0.3, help='explore noise')
|
||||
parser.add_argument('--noise_decay', type=float, default=0.99,
|
||||
help='decay rate of explore noise')
|
||||
parser.add_argument('--Double', type=str2bool, default=False,
|
||||
|
250
GA/GA_MTSP.py
Normal file
250
GA/GA_MTSP.py
Normal file
@ -0,0 +1,250 @@
|
||||
import numpy as np
|
||||
import random
|
||||
import math
|
||||
import matplotlib.pyplot as plt
|
||||
import time
|
||||
|
||||
|
||||
class MTSP_GA:
|
||||
def __init__(self, cities, vehicle_num, population_size=200, max_iterations=1500):
|
||||
"""
|
||||
初始化遗传算法求解器
|
||||
Args:
|
||||
cities: 城市坐标数组,第一个城市为起始点
|
||||
vehicle_num: 车辆数量
|
||||
population_size: 种群大小
|
||||
max_iterations: 最大迭代次数
|
||||
"""
|
||||
self.cities = np.array(cities)
|
||||
self.city_count = len(cities)
|
||||
self.vehicle_num = vehicle_num
|
||||
self.origin = 0 # 起始点
|
||||
|
||||
# GA参数
|
||||
self.population_size = population_size
|
||||
self.max_iterations = max_iterations
|
||||
self.retain_rate = 0.3 # 强者存活率
|
||||
self.random_rate = 0.5 # 弱者存活概率
|
||||
self.mutation_rate = 0.3 # 变异率
|
||||
|
||||
# 计算距离矩阵
|
||||
self.distance_matrix = self._compute_distance_matrix()
|
||||
|
||||
# 记录收敛过程
|
||||
self.distance_history = []
|
||||
self.best_path_history = []
|
||||
|
||||
def _compute_distance_matrix(self):
|
||||
"""计算城市间距离矩阵"""
|
||||
distance = np.zeros((self.city_count, self.city_count))
|
||||
for i in range(self.city_count):
|
||||
for j in range(self.city_count):
|
||||
distance[i][j] = math.sqrt(
|
||||
(self.cities[i][0] - self.cities[j][0]) ** 2 +
|
||||
(self.cities[i][1] - self.cities[j][1]) ** 2
|
||||
)
|
||||
return distance
|
||||
|
||||
def _create_individual(self):
|
||||
"""生成初始个体"""
|
||||
index = [i for i in range(self.city_count)]
|
||||
index.remove(self.origin)
|
||||
a = int(np.floor(len(index)/self.vehicle_num))
|
||||
X = []
|
||||
for i in range(self.vehicle_num):
|
||||
if i < self.vehicle_num-1:
|
||||
x = index[a*i:a*(i+1)]
|
||||
else:
|
||||
x = index[a*i:]
|
||||
X.append(x)
|
||||
return X
|
||||
|
||||
def _get_total_distance(self, X):
|
||||
"""计算路径总距离"""
|
||||
distance = 0
|
||||
distance_list = []
|
||||
for x in X:
|
||||
d = self.distance_matrix[self.origin][x[0]] # 从起点到第一个城市
|
||||
d += self.distance_matrix[self.origin][x[-1]] # 从最后一个城市返回起点
|
||||
for i in range(len(x)-1):
|
||||
d += self.distance_matrix[x[i]][x[i+1]]
|
||||
distance += d
|
||||
distance_list.append(d)
|
||||
return distance, distance_list
|
||||
|
||||
def _selection(self, population):
|
||||
"""选择操作"""
|
||||
graded = [[self._get_total_distance(x)[0], x] for x in population]
|
||||
graded = [x[1] for x in sorted(graded)]
|
||||
retain_length = int(len(graded) * self.retain_rate)
|
||||
parents = graded[:retain_length]
|
||||
|
||||
for chromosome in graded[retain_length:]:
|
||||
if random.random() < self.random_rate:
|
||||
parents.append(chromosome)
|
||||
return parents
|
||||
|
||||
def _crossover(self, parents):
|
||||
"""交叉操作"""
|
||||
target_count = self.population_size - len(parents)
|
||||
children = []
|
||||
while len(children) < target_count:
|
||||
male_index = random.randint(0, len(parents) - 1)
|
||||
female_index = random.randint(0, len(parents) - 1)
|
||||
if male_index != female_index:
|
||||
male = parents[male_index]
|
||||
female = parents[female_index]
|
||||
|
||||
gene1 = []
|
||||
gene2 = []
|
||||
for i in range(len(male)):
|
||||
gene1 += male[i]
|
||||
gene2 += female[i]
|
||||
|
||||
left = random.randint(0, len(gene1)//2)
|
||||
right = random.randint(left + 1, len(gene1))
|
||||
cut = gene1[left:right]
|
||||
copy = gene2.copy()
|
||||
for j in cut:
|
||||
copy.remove(j)
|
||||
|
||||
child = copy + cut
|
||||
a = int(np.floor(len(child)/self.vehicle_num))
|
||||
child_c = []
|
||||
for i in range(self.vehicle_num):
|
||||
if i < self.vehicle_num - 1:
|
||||
x = child[a * i:a * (i + 1)]
|
||||
else:
|
||||
x = child[a * i:]
|
||||
child_c.append(x)
|
||||
children.append(child_c)
|
||||
return children
|
||||
|
||||
def _mutation(self, children):
|
||||
"""变异操作"""
|
||||
for i in range(len(children)):
|
||||
if random.random() < self.mutation_rate:
|
||||
child = children[i]
|
||||
for j in range(int(np.floor(len(child)/2))):
|
||||
a = 2*j
|
||||
u = random.randint(1, len(child[a]) - 1)
|
||||
w = random.randint(1, len(child[a+1]) - 1)
|
||||
child_1 = child[a][:u].copy()
|
||||
child_2 = child[a][u:].copy()
|
||||
child_3 = child[a+1][:w].copy()
|
||||
child_4 = child[a+1][w:].copy()
|
||||
child_a = child_1+child_3
|
||||
child_b = child_2+child_4
|
||||
child[a] = child_a
|
||||
child[a+1] = child_b
|
||||
children[i] = child.copy()
|
||||
return children
|
||||
|
||||
def _get_best_solution(self, population):
|
||||
"""获取最优解"""
|
||||
graded = [[self._get_total_distance(x)[0], x] for x in population]
|
||||
graded = sorted(graded, key=lambda x: x[0])
|
||||
return graded[0][0], graded[0][1]
|
||||
|
||||
def solve(self):
|
||||
"""
|
||||
求解MTSP,加入早停机制
|
||||
当连续50轮没有更好的解时停止迭代
|
||||
"""
|
||||
# 初始化种群
|
||||
population = [self._create_individual() for _ in range(self.population_size)]
|
||||
|
||||
# 初始化早停相关变量
|
||||
best_distance = float('inf')
|
||||
early_stop_counter = 0
|
||||
early_stop_threshold = 100
|
||||
|
||||
# 迭代优化
|
||||
for i in range(self.max_iterations):
|
||||
parents = self._selection(population)
|
||||
children = self._crossover(parents)
|
||||
children = self._mutation(children)
|
||||
population = parents + children
|
||||
|
||||
# 记录当前最优解
|
||||
current_distance, current_path = self._get_best_solution(population)
|
||||
self.distance_history.append(current_distance)
|
||||
self.best_path_history.append(current_path)
|
||||
|
||||
# 早停判断
|
||||
if current_distance < best_distance:
|
||||
best_distance = current_distance
|
||||
best_path = current_path
|
||||
early_stop_counter = 0 # 重置计数器
|
||||
else:
|
||||
early_stop_counter += 1
|
||||
|
||||
# 如果连续50轮没有更好的解,则停止迭代
|
||||
if early_stop_counter >= early_stop_threshold:
|
||||
print(f"Early stopping at iteration {i} due to no improvement in {early_stop_threshold} iterations")
|
||||
break
|
||||
|
||||
# 返回最优解
|
||||
return best_distance, best_path
|
||||
|
||||
def plot_convergence(self):
|
||||
"""绘制收敛曲线"""
|
||||
plt.plot(range(len(self.distance_history)), self.distance_history)
|
||||
plt.xlabel('Iteration')
|
||||
plt.ylabel('Total Distance')
|
||||
plt.title('Convergence Curve')
|
||||
plt.show()
|
||||
|
||||
|
||||
def main():
|
||||
# 城市坐标
|
||||
cities = np.array([
|
||||
(456, 320), # 起点(基地)
|
||||
(228, 0),
|
||||
(912, 0),
|
||||
(0, 80),
|
||||
(114, 80),
|
||||
(570, 160),
|
||||
(798, 160),
|
||||
(342, 240),
|
||||
(684, 240),
|
||||
(570, 400),
|
||||
(912, 400),
|
||||
(114, 480),
|
||||
(228, 480),
|
||||
(342, 560),
|
||||
(684, 560),
|
||||
(0, 640),
|
||||
(798, 640)
|
||||
])
|
||||
|
||||
# 设置随机种子
|
||||
np.random.seed(42)
|
||||
random.seed(42)
|
||||
|
||||
# 创建求解器实例
|
||||
solver = MTSP_GA(
|
||||
cities=cities,
|
||||
vehicle_num=4,
|
||||
population_size=200,
|
||||
max_iterations=1500
|
||||
)
|
||||
|
||||
# 求解
|
||||
start_time = time.time()
|
||||
best_distance, best_path = solver.solve()
|
||||
end_time = time.time()
|
||||
|
||||
# 输出结果
|
||||
print(f"最优总距离: {best_distance:.2f}")
|
||||
print("最优路径方案:")
|
||||
for i, path in enumerate(best_path):
|
||||
print(f"车辆{i+1}的路径: {path}")
|
||||
print(f"求解时间: {end_time - start_time:.2f}秒")
|
||||
|
||||
# 绘制收敛曲线
|
||||
solver.plot_convergence()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -9,10 +9,12 @@ state = env.reset()
|
||||
print('state:', state)
|
||||
|
||||
# action_series = [[0.67], [0], [0], [0], [0.7]]
|
||||
action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0]
|
||||
# action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0]
|
||||
# action_series = [1] * 30
|
||||
# action_series = [[0.2], [0.4], [0.7], [0.5]]
|
||||
# action_series = [[-0.08], [-0.08], [0], [0]]
|
||||
action_series = [3, 5, 3, 5, 3, 5, 3, 5, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4,
|
||||
3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4]
|
||||
|
||||
for i in range(100):
|
||||
action = action_series[i]
|
||||
|
@ -12,7 +12,7 @@ random.seed(42)
|
||||
# ---------------------------
|
||||
# 需要修改的超参数
|
||||
# ---------------------------
|
||||
num_iterations = 3000000000
|
||||
num_iterations = 300000000
|
||||
# 随机生成分区的行分段数与列分段数
|
||||
R = random.randint(0, 3) # 行分段数
|
||||
C = random.randint(0, 3) # 列分段数
|
||||
@ -48,7 +48,7 @@ best_solution = None
|
||||
for iteration in tqdm(range(num_iterations), desc="蒙特卡洛模拟进度"):
|
||||
# 直接切值
|
||||
# horiz = [random.random() for _ in range(R)]
|
||||
horiz = [random.randint(1, 999)/1000 for _ in range(R)]
|
||||
horiz = [random.randint(1, 99)/100 for _ in range(R)]
|
||||
horiz = sorted(set(horiz))
|
||||
horiz = horiz if horiz else []
|
||||
row_boundaries = [0] + horiz + [1]
|
||||
|
Loading…
Reference in New Issue
Block a user