修改dqn参数
This commit is contained in:
parent
981681c1bd
commit
0be9fa596a
2
.gitignore
vendored
2
.gitignore
vendored
@ -9,7 +9,7 @@ __pycache__/
|
|||||||
|
|
||||||
# Pytorch weights
|
# Pytorch weights
|
||||||
model/
|
model/
|
||||||
logs/
|
runs/
|
||||||
|
|
||||||
# Distribution / packaging
|
# Distribution / packaging
|
||||||
.Python
|
.Python
|
||||||
|
@ -18,7 +18,7 @@ parser = argparse.ArgumentParser()
|
|||||||
parser.add_argument('--dvc', type=str, default='cpu',
|
parser.add_argument('--dvc', type=str, default='cpu',
|
||||||
help='running device: cuda or cpu')
|
help='running device: cuda or cpu')
|
||||||
parser.add_argument('--EnvIdex', type=int, default=0, help='CP-v1, LLd-v2')
|
parser.add_argument('--EnvIdex', type=int, default=0, help='CP-v1, LLd-v2')
|
||||||
parser.add_argument('--write', type=str2bool, default=False,
|
parser.add_argument('--write', type=str2bool, default=True,
|
||||||
help='Use SummaryWriter to record the training')
|
help='Use SummaryWriter to record the training')
|
||||||
parser.add_argument('--render', type=str2bool,
|
parser.add_argument('--render', type=str2bool,
|
||||||
default=False, help='Render or Not')
|
default=False, help='Render or Not')
|
||||||
@ -31,10 +31,10 @@ parser.add_argument('--seed', type=int, default=0, help='random seed')
|
|||||||
parser.add_argument('--Max_train_steps', type=int,
|
parser.add_argument('--Max_train_steps', type=int,
|
||||||
default=int(1e8), help='Max training steps')
|
default=int(1e8), help='Max training steps')
|
||||||
parser.add_argument('--save_interval', type=int,
|
parser.add_argument('--save_interval', type=int,
|
||||||
default=int(5e3), help='Model saving interval, in steps.')
|
default=int(5e4), help='Model saving interval, in steps.')
|
||||||
parser.add_argument('--eval_interval', type=int, default=int(2e3),
|
parser.add_argument('--eval_interval', type=int, default=int(2e3),
|
||||||
help='Model evaluating interval, in steps.')
|
help='Model evaluating interval, in steps.')
|
||||||
parser.add_argument('--random_steps', type=int, default=int(3e3),
|
parser.add_argument('--random_steps', type=int, default=int(6e3),
|
||||||
help='steps for random policy to explore')
|
help='steps for random policy to explore')
|
||||||
parser.add_argument('--update_every', type=int,
|
parser.add_argument('--update_every', type=int,
|
||||||
default=10, help='training frequency')
|
default=10, help='training frequency')
|
||||||
@ -47,7 +47,7 @@ parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate')
|
|||||||
parser.add_argument('--batch_size', type=int, default=256,
|
parser.add_argument('--batch_size', type=int, default=256,
|
||||||
help='lenth of sliced trajectory')
|
help='lenth of sliced trajectory')
|
||||||
parser.add_argument('--exp_noise', type=float,
|
parser.add_argument('--exp_noise', type=float,
|
||||||
default=0.2, help='explore noise')
|
default=0.3, help='explore noise')
|
||||||
parser.add_argument('--noise_decay', type=float, default=0.99,
|
parser.add_argument('--noise_decay', type=float, default=0.99,
|
||||||
help='decay rate of explore noise')
|
help='decay rate of explore noise')
|
||||||
parser.add_argument('--Double', type=str2bool, default=False,
|
parser.add_argument('--Double', type=str2bool, default=False,
|
||||||
|
250
GA/GA_MTSP.py
Normal file
250
GA/GA_MTSP.py
Normal file
@ -0,0 +1,250 @@
|
|||||||
|
import numpy as np
|
||||||
|
import random
|
||||||
|
import math
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
class MTSP_GA:
|
||||||
|
def __init__(self, cities, vehicle_num, population_size=200, max_iterations=1500):
|
||||||
|
"""
|
||||||
|
初始化遗传算法求解器
|
||||||
|
Args:
|
||||||
|
cities: 城市坐标数组,第一个城市为起始点
|
||||||
|
vehicle_num: 车辆数量
|
||||||
|
population_size: 种群大小
|
||||||
|
max_iterations: 最大迭代次数
|
||||||
|
"""
|
||||||
|
self.cities = np.array(cities)
|
||||||
|
self.city_count = len(cities)
|
||||||
|
self.vehicle_num = vehicle_num
|
||||||
|
self.origin = 0 # 起始点
|
||||||
|
|
||||||
|
# GA参数
|
||||||
|
self.population_size = population_size
|
||||||
|
self.max_iterations = max_iterations
|
||||||
|
self.retain_rate = 0.3 # 强者存活率
|
||||||
|
self.random_rate = 0.5 # 弱者存活概率
|
||||||
|
self.mutation_rate = 0.3 # 变异率
|
||||||
|
|
||||||
|
# 计算距离矩阵
|
||||||
|
self.distance_matrix = self._compute_distance_matrix()
|
||||||
|
|
||||||
|
# 记录收敛过程
|
||||||
|
self.distance_history = []
|
||||||
|
self.best_path_history = []
|
||||||
|
|
||||||
|
def _compute_distance_matrix(self):
|
||||||
|
"""计算城市间距离矩阵"""
|
||||||
|
distance = np.zeros((self.city_count, self.city_count))
|
||||||
|
for i in range(self.city_count):
|
||||||
|
for j in range(self.city_count):
|
||||||
|
distance[i][j] = math.sqrt(
|
||||||
|
(self.cities[i][0] - self.cities[j][0]) ** 2 +
|
||||||
|
(self.cities[i][1] - self.cities[j][1]) ** 2
|
||||||
|
)
|
||||||
|
return distance
|
||||||
|
|
||||||
|
def _create_individual(self):
|
||||||
|
"""生成初始个体"""
|
||||||
|
index = [i for i in range(self.city_count)]
|
||||||
|
index.remove(self.origin)
|
||||||
|
a = int(np.floor(len(index)/self.vehicle_num))
|
||||||
|
X = []
|
||||||
|
for i in range(self.vehicle_num):
|
||||||
|
if i < self.vehicle_num-1:
|
||||||
|
x = index[a*i:a*(i+1)]
|
||||||
|
else:
|
||||||
|
x = index[a*i:]
|
||||||
|
X.append(x)
|
||||||
|
return X
|
||||||
|
|
||||||
|
def _get_total_distance(self, X):
|
||||||
|
"""计算路径总距离"""
|
||||||
|
distance = 0
|
||||||
|
distance_list = []
|
||||||
|
for x in X:
|
||||||
|
d = self.distance_matrix[self.origin][x[0]] # 从起点到第一个城市
|
||||||
|
d += self.distance_matrix[self.origin][x[-1]] # 从最后一个城市返回起点
|
||||||
|
for i in range(len(x)-1):
|
||||||
|
d += self.distance_matrix[x[i]][x[i+1]]
|
||||||
|
distance += d
|
||||||
|
distance_list.append(d)
|
||||||
|
return distance, distance_list
|
||||||
|
|
||||||
|
def _selection(self, population):
|
||||||
|
"""选择操作"""
|
||||||
|
graded = [[self._get_total_distance(x)[0], x] for x in population]
|
||||||
|
graded = [x[1] for x in sorted(graded)]
|
||||||
|
retain_length = int(len(graded) * self.retain_rate)
|
||||||
|
parents = graded[:retain_length]
|
||||||
|
|
||||||
|
for chromosome in graded[retain_length:]:
|
||||||
|
if random.random() < self.random_rate:
|
||||||
|
parents.append(chromosome)
|
||||||
|
return parents
|
||||||
|
|
||||||
|
def _crossover(self, parents):
|
||||||
|
"""交叉操作"""
|
||||||
|
target_count = self.population_size - len(parents)
|
||||||
|
children = []
|
||||||
|
while len(children) < target_count:
|
||||||
|
male_index = random.randint(0, len(parents) - 1)
|
||||||
|
female_index = random.randint(0, len(parents) - 1)
|
||||||
|
if male_index != female_index:
|
||||||
|
male = parents[male_index]
|
||||||
|
female = parents[female_index]
|
||||||
|
|
||||||
|
gene1 = []
|
||||||
|
gene2 = []
|
||||||
|
for i in range(len(male)):
|
||||||
|
gene1 += male[i]
|
||||||
|
gene2 += female[i]
|
||||||
|
|
||||||
|
left = random.randint(0, len(gene1)//2)
|
||||||
|
right = random.randint(left + 1, len(gene1))
|
||||||
|
cut = gene1[left:right]
|
||||||
|
copy = gene2.copy()
|
||||||
|
for j in cut:
|
||||||
|
copy.remove(j)
|
||||||
|
|
||||||
|
child = copy + cut
|
||||||
|
a = int(np.floor(len(child)/self.vehicle_num))
|
||||||
|
child_c = []
|
||||||
|
for i in range(self.vehicle_num):
|
||||||
|
if i < self.vehicle_num - 1:
|
||||||
|
x = child[a * i:a * (i + 1)]
|
||||||
|
else:
|
||||||
|
x = child[a * i:]
|
||||||
|
child_c.append(x)
|
||||||
|
children.append(child_c)
|
||||||
|
return children
|
||||||
|
|
||||||
|
def _mutation(self, children):
|
||||||
|
"""变异操作"""
|
||||||
|
for i in range(len(children)):
|
||||||
|
if random.random() < self.mutation_rate:
|
||||||
|
child = children[i]
|
||||||
|
for j in range(int(np.floor(len(child)/2))):
|
||||||
|
a = 2*j
|
||||||
|
u = random.randint(1, len(child[a]) - 1)
|
||||||
|
w = random.randint(1, len(child[a+1]) - 1)
|
||||||
|
child_1 = child[a][:u].copy()
|
||||||
|
child_2 = child[a][u:].copy()
|
||||||
|
child_3 = child[a+1][:w].copy()
|
||||||
|
child_4 = child[a+1][w:].copy()
|
||||||
|
child_a = child_1+child_3
|
||||||
|
child_b = child_2+child_4
|
||||||
|
child[a] = child_a
|
||||||
|
child[a+1] = child_b
|
||||||
|
children[i] = child.copy()
|
||||||
|
return children
|
||||||
|
|
||||||
|
def _get_best_solution(self, population):
|
||||||
|
"""获取最优解"""
|
||||||
|
graded = [[self._get_total_distance(x)[0], x] for x in population]
|
||||||
|
graded = sorted(graded, key=lambda x: x[0])
|
||||||
|
return graded[0][0], graded[0][1]
|
||||||
|
|
||||||
|
def solve(self):
|
||||||
|
"""
|
||||||
|
求解MTSP,加入早停机制
|
||||||
|
当连续50轮没有更好的解时停止迭代
|
||||||
|
"""
|
||||||
|
# 初始化种群
|
||||||
|
population = [self._create_individual() for _ in range(self.population_size)]
|
||||||
|
|
||||||
|
# 初始化早停相关变量
|
||||||
|
best_distance = float('inf')
|
||||||
|
early_stop_counter = 0
|
||||||
|
early_stop_threshold = 100
|
||||||
|
|
||||||
|
# 迭代优化
|
||||||
|
for i in range(self.max_iterations):
|
||||||
|
parents = self._selection(population)
|
||||||
|
children = self._crossover(parents)
|
||||||
|
children = self._mutation(children)
|
||||||
|
population = parents + children
|
||||||
|
|
||||||
|
# 记录当前最优解
|
||||||
|
current_distance, current_path = self._get_best_solution(population)
|
||||||
|
self.distance_history.append(current_distance)
|
||||||
|
self.best_path_history.append(current_path)
|
||||||
|
|
||||||
|
# 早停判断
|
||||||
|
if current_distance < best_distance:
|
||||||
|
best_distance = current_distance
|
||||||
|
best_path = current_path
|
||||||
|
early_stop_counter = 0 # 重置计数器
|
||||||
|
else:
|
||||||
|
early_stop_counter += 1
|
||||||
|
|
||||||
|
# 如果连续50轮没有更好的解,则停止迭代
|
||||||
|
if early_stop_counter >= early_stop_threshold:
|
||||||
|
print(f"Early stopping at iteration {i} due to no improvement in {early_stop_threshold} iterations")
|
||||||
|
break
|
||||||
|
|
||||||
|
# 返回最优解
|
||||||
|
return best_distance, best_path
|
||||||
|
|
||||||
|
def plot_convergence(self):
|
||||||
|
"""绘制收敛曲线"""
|
||||||
|
plt.plot(range(len(self.distance_history)), self.distance_history)
|
||||||
|
plt.xlabel('Iteration')
|
||||||
|
plt.ylabel('Total Distance')
|
||||||
|
plt.title('Convergence Curve')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# 城市坐标
|
||||||
|
cities = np.array([
|
||||||
|
(456, 320), # 起点(基地)
|
||||||
|
(228, 0),
|
||||||
|
(912, 0),
|
||||||
|
(0, 80),
|
||||||
|
(114, 80),
|
||||||
|
(570, 160),
|
||||||
|
(798, 160),
|
||||||
|
(342, 240),
|
||||||
|
(684, 240),
|
||||||
|
(570, 400),
|
||||||
|
(912, 400),
|
||||||
|
(114, 480),
|
||||||
|
(228, 480),
|
||||||
|
(342, 560),
|
||||||
|
(684, 560),
|
||||||
|
(0, 640),
|
||||||
|
(798, 640)
|
||||||
|
])
|
||||||
|
|
||||||
|
# 设置随机种子
|
||||||
|
np.random.seed(42)
|
||||||
|
random.seed(42)
|
||||||
|
|
||||||
|
# 创建求解器实例
|
||||||
|
solver = MTSP_GA(
|
||||||
|
cities=cities,
|
||||||
|
vehicle_num=4,
|
||||||
|
population_size=200,
|
||||||
|
max_iterations=1500
|
||||||
|
)
|
||||||
|
|
||||||
|
# 求解
|
||||||
|
start_time = time.time()
|
||||||
|
best_distance, best_path = solver.solve()
|
||||||
|
end_time = time.time()
|
||||||
|
|
||||||
|
# 输出结果
|
||||||
|
print(f"最优总距离: {best_distance:.2f}")
|
||||||
|
print("最优路径方案:")
|
||||||
|
for i, path in enumerate(best_path):
|
||||||
|
print(f"车辆{i+1}的路径: {path}")
|
||||||
|
print(f"求解时间: {end_time - start_time:.2f}秒")
|
||||||
|
|
||||||
|
# 绘制收敛曲线
|
||||||
|
solver.plot_convergence()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -9,10 +9,12 @@ state = env.reset()
|
|||||||
print('state:', state)
|
print('state:', state)
|
||||||
|
|
||||||
# action_series = [[0.67], [0], [0], [0], [0.7]]
|
# action_series = [[0.67], [0], [0], [0], [0.7]]
|
||||||
action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0]
|
# action_series = [3, 3, 3, 5, 5, 1, 1, 1, 0, 0, 0]
|
||||||
# action_series = [1] * 30
|
# action_series = [1] * 30
|
||||||
# action_series = [[0.2], [0.4], [0.7], [0.5]]
|
# action_series = [[0.2], [0.4], [0.7], [0.5]]
|
||||||
# action_series = [[-0.08], [-0.08], [0], [0]]
|
# action_series = [[-0.08], [-0.08], [0], [0]]
|
||||||
|
action_series = [3, 5, 3, 5, 3, 5, 3, 5, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4,
|
||||||
|
3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4]
|
||||||
|
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
action = action_series[i]
|
action = action_series[i]
|
||||||
|
@ -12,7 +12,7 @@ random.seed(42)
|
|||||||
# ---------------------------
|
# ---------------------------
|
||||||
# 需要修改的超参数
|
# 需要修改的超参数
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
num_iterations = 3000000000
|
num_iterations = 300000000
|
||||||
# 随机生成分区的行分段数与列分段数
|
# 随机生成分区的行分段数与列分段数
|
||||||
R = random.randint(0, 3) # 行分段数
|
R = random.randint(0, 3) # 行分段数
|
||||||
C = random.randint(0, 3) # 列分段数
|
C = random.randint(0, 3) # 列分段数
|
||||||
@ -48,7 +48,7 @@ best_solution = None
|
|||||||
for iteration in tqdm(range(num_iterations), desc="蒙特卡洛模拟进度"):
|
for iteration in tqdm(range(num_iterations), desc="蒙特卡洛模拟进度"):
|
||||||
# 直接切值
|
# 直接切值
|
||||||
# horiz = [random.random() for _ in range(R)]
|
# horiz = [random.random() for _ in range(R)]
|
||||||
horiz = [random.randint(1, 999)/1000 for _ in range(R)]
|
horiz = [random.randint(1, 99)/100 for _ in range(R)]
|
||||||
horiz = sorted(set(horiz))
|
horiz = sorted(set(horiz))
|
||||||
horiz = horiz if horiz else []
|
horiz = horiz if horiz else []
|
||||||
row_boundaries = [0] + horiz + [1]
|
row_boundaries = [0] + horiz + [1]
|
||||||
|
Loading…
Reference in New Issue
Block a user