加入q learning
This commit is contained in:
parent
6f44d142bc
commit
6f8fcd15b7
@ -20,7 +20,7 @@ best_col_boundaries = None
|
|||||||
# ---------------------------
|
# ---------------------------
|
||||||
R = 3
|
R = 3
|
||||||
C = 3
|
C = 3
|
||||||
params_file = 'params3'
|
params_file = 'params2'
|
||||||
|
|
||||||
|
|
||||||
with open(params_file + '.yml', 'r', encoding='utf-8') as file:
|
with open(params_file + '.yml', 'r', encoding='utf-8') as file:
|
||||||
|
@ -30,9 +30,9 @@ if __name__ == "__main__": # 重要:在 Windows 上必须加这一行
|
|||||||
# ---------------------------
|
# ---------------------------
|
||||||
# 需要修改的超参数
|
# 需要修改的超参数
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
R = 1
|
R = 3
|
||||||
C = 1
|
C = 3
|
||||||
params_file = 'params3'
|
params_file = 'params2'
|
||||||
batch_size = 60 # 控制一次最多并行多少个任务
|
batch_size = 60 # 控制一次最多并行多少个任务
|
||||||
|
|
||||||
with open(params_file + '.yml', 'r', encoding='utf-8') as file:
|
with open(params_file + '.yml', 'r', encoding='utf-8') as file:
|
||||||
|
@ -16,7 +16,8 @@ class SA_FineTuner:
|
|||||||
:param cooling_rate: 温度下降速率
|
:param cooling_rate: 温度下降速率
|
||||||
"""
|
"""
|
||||||
# 读取参数
|
# 读取参数
|
||||||
with open(params_file + '.yml', 'r', encoding='utf-8') as file:
|
self.params_file = params_file
|
||||||
|
with open(self.params_file + '.yml', 'r', encoding='utf-8') as file:
|
||||||
params = yaml.safe_load(file)
|
params = yaml.safe_load(file)
|
||||||
|
|
||||||
self.H = params['H']
|
self.H = params['H']
|
||||||
@ -91,6 +92,21 @@ class SA_FineTuner:
|
|||||||
delta = new_T - current_T
|
delta = new_T - current_T
|
||||||
acceptance_probability = math.exp(-delta / temperature)
|
acceptance_probability = math.exp(-delta / temperature)
|
||||||
return random.random() < acceptance_probability
|
return random.random() < acceptance_probability
|
||||||
|
|
||||||
|
def save_best_solution(self, row_cuts, col_cuts, car_paths):
|
||||||
|
"""
|
||||||
|
保存最佳方案
|
||||||
|
:param row_cuts: 行切分比例
|
||||||
|
:param col_cuts: 列切分比例
|
||||||
|
:param car_paths: 车队路径
|
||||||
|
"""
|
||||||
|
output_data = {
|
||||||
|
'row_boundaries': row_cuts,
|
||||||
|
'col_boundaries': col_cuts,
|
||||||
|
'car_paths': car_paths
|
||||||
|
}
|
||||||
|
with open(f'./solutions/finetune_{self.params_file}.json', 'w', encoding='utf-8') as file:
|
||||||
|
json.dump(output_data, file, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
def T_function(self, row_cuts, col_cuts):
|
def T_function(self, row_cuts, col_cuts):
|
||||||
"""
|
"""
|
||||||
@ -181,6 +197,9 @@ class SA_FineTuner:
|
|||||||
if iteration % 100 == 0:
|
if iteration % 100 == 0:
|
||||||
print(
|
print(
|
||||||
f"Iteration {iteration}: Best T = {self.best_T}, Temperature = {self.temperature}")
|
f"Iteration {iteration}: Best T = {self.best_T}, Temperature = {self.temperature}")
|
||||||
|
|
||||||
|
# 保存最佳方案
|
||||||
|
self.save_best_solution(self.best_row_cuts, self.best_col_cuts, self.car_paths)
|
||||||
|
|
||||||
return self.best_row_cuts, self.best_col_cuts, self.best_T
|
return self.best_row_cuts, self.best_col_cuts, self.best_T
|
||||||
|
|
||||||
@ -206,7 +225,7 @@ if __name__ == "__main__":
|
|||||||
# ---------------------------
|
# ---------------------------
|
||||||
# 需要修改的超参数
|
# 需要修改的超参数
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
solution_path = r"solutions\mtkl_params2.json"
|
solution_path = r"solutions\trav_ga_params2_parallel.json"
|
||||||
params_file = r"params2"
|
params_file = r"params2"
|
||||||
max_iterations=10000
|
max_iterations=10000
|
||||||
initial_temp=100
|
initial_temp=100
|
||||||
|
234
Q_learning/q_table.py
Normal file
234
Q_learning/q_table.py
Normal file
@ -0,0 +1,234 @@
|
|||||||
|
import random
|
||||||
|
import numpy as np
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
import yaml
|
||||||
|
# 参数设置
|
||||||
|
STEP = 0.01
|
||||||
|
VALUES = [round(i*STEP, 2) for i in range(101)] # 0.00~1.00
|
||||||
|
ACTION_DELTA = [STEP, -STEP] # 增加或减少 0.01
|
||||||
|
ACTIONS = [] # 每个动作为 (var_index, delta)
|
||||||
|
for i in range(3):
|
||||||
|
for delta in ACTION_DELTA:
|
||||||
|
ACTIONS.append((i, delta))
|
||||||
|
|
||||||
|
ALPHA = 0.1 # 学习率
|
||||||
|
GAMMA = 0.9 # 折扣因子
|
||||||
|
EPSILON = 0.2 # 探索率
|
||||||
|
NUM_EPISODES = 100
|
||||||
|
|
||||||
|
def f(state):
|
||||||
|
"""
|
||||||
|
计算切分比例的目标值 T(占位函数)
|
||||||
|
:param row_cuts: 行切分比例
|
||||||
|
:param col_cuts: 列切分比例
|
||||||
|
:return: 目标值 T
|
||||||
|
"""
|
||||||
|
with open('params2.yml', 'r', encoding='utf-8') as file:
|
||||||
|
params = yaml.safe_load(file)
|
||||||
|
|
||||||
|
H = params['H']
|
||||||
|
W = params['W']
|
||||||
|
num_cars = params['num_cars']
|
||||||
|
|
||||||
|
flight_time_factor = params['flight_time_factor']
|
||||||
|
comp_time_factor = params['comp_time_factor']
|
||||||
|
trans_time_factor = params['trans_time_factor']
|
||||||
|
car_time_factor = params['car_time_factor']
|
||||||
|
bs_time_factor = params['bs_time_factor']
|
||||||
|
|
||||||
|
flight_energy_factor = params['flight_energy_factor']
|
||||||
|
comp_energy_factor = params['comp_energy_factor']
|
||||||
|
trans_energy_factor = params['trans_energy_factor']
|
||||||
|
battery_energy_capacity = params['battery_energy_capacity']
|
||||||
|
|
||||||
|
col_cuts = list(state)
|
||||||
|
col_cuts.insert(0, 0)
|
||||||
|
col_cuts.append(1)
|
||||||
|
row_cuts = [0, 0.5, 1]
|
||||||
|
rectangles = []
|
||||||
|
for i in range(len(row_cuts) - 1):
|
||||||
|
for j in range(len(col_cuts) - 1):
|
||||||
|
d = (col_cuts[j+1] - col_cuts[j]) * W * \
|
||||||
|
(row_cuts[i+1] - row_cuts[i]) * H
|
||||||
|
rho_time_limit = (flight_time_factor - trans_time_factor) / \
|
||||||
|
(comp_time_factor - trans_time_factor)
|
||||||
|
rho_energy_limit = (battery_energy_capacity - flight_energy_factor * d - trans_energy_factor * d) / (comp_energy_factor * d - trans_energy_factor * d)
|
||||||
|
if rho_energy_limit < 0:
|
||||||
|
return 100000
|
||||||
|
rho = min(rho_time_limit, rho_energy_limit)
|
||||||
|
|
||||||
|
flight_time = flight_time_factor * d
|
||||||
|
bs_time = bs_time_factor * (1 - rho) * d
|
||||||
|
|
||||||
|
rectangles.append({
|
||||||
|
'flight_time': flight_time,
|
||||||
|
'bs_time': bs_time,
|
||||||
|
'center': ((row_cuts[i] + row_cuts[i+1]) / 2.0 * H,
|
||||||
|
(col_cuts[j] + col_cuts[j+1]) / 2.0 * W)
|
||||||
|
})
|
||||||
|
|
||||||
|
mortorcade_time_lt = []
|
||||||
|
for idx in range(num_cars):
|
||||||
|
car_path = car_paths[idx]
|
||||||
|
|
||||||
|
flight_time = sum(rectangles[point]['flight_time']
|
||||||
|
for point in car_path)
|
||||||
|
bs_time = sum(rectangles[point]['bs_time'] for point in car_path)
|
||||||
|
|
||||||
|
car_time = 0
|
||||||
|
for i in range(len(car_path) - 1):
|
||||||
|
first_point = car_path[i]
|
||||||
|
second_point = car_path[i + 1]
|
||||||
|
car_time += math.dist(
|
||||||
|
rectangles[first_point]['center'], rectangles[second_point]['center']) * car_time_factor
|
||||||
|
car_time += math.dist(rectangles[car_path[0]]['center'],
|
||||||
|
[H / 2, W / 2]) * car_time_factor
|
||||||
|
car_time += math.dist(rectangles[car_path[-1]]['center'],
|
||||||
|
[H / 2, W / 2]) * car_time_factor
|
||||||
|
mortorcade_time_lt.append(max(car_time + flight_time, bs_time))
|
||||||
|
|
||||||
|
return max(mortorcade_time_lt)
|
||||||
|
|
||||||
|
# 环境类:定义状态转移与奖励
|
||||||
|
class FunctionEnv:
|
||||||
|
def __init__(self, initial_state):
|
||||||
|
self.state = initial_state # 初始状态 (x1,x2,x3)
|
||||||
|
self.best_value = float('inf') # 记录最佳值
|
||||||
|
self.no_improvement_count = 0 # 记录连续未改善的次数
|
||||||
|
self.last_state = None # 记录上一个状态
|
||||||
|
self.min_improvement = 0.001 # 最小改善阈值
|
||||||
|
self.max_no_improvement = 10 # 最大允许连续未改善次数
|
||||||
|
self.target_threshold = 10000 # 目标函数值的可接受阈值
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
# action: (var_index, delta)
|
||||||
|
var_index, delta = action
|
||||||
|
new_state = list(self.state)
|
||||||
|
new_state[var_index] = round(new_state[var_index] + delta, 2)
|
||||||
|
# 保证取值在0-1范围内
|
||||||
|
if new_state[var_index] < 0 or new_state[var_index] > 1:
|
||||||
|
return self.state, -10000.0, True # episode结束
|
||||||
|
# 检查约束:x1 < x2 < x3
|
||||||
|
if not (0 < new_state[0] < new_state[1] < new_state[2] < 1):
|
||||||
|
return self.state, -10000.0, True
|
||||||
|
|
||||||
|
next_state = tuple(new_state)
|
||||||
|
current_value = f(next_state)
|
||||||
|
|
||||||
|
# 检查是否达到目标阈值
|
||||||
|
if current_value < self.target_threshold:
|
||||||
|
return next_state, 12000 - current_value, True
|
||||||
|
|
||||||
|
# 检查状态变化是否很小
|
||||||
|
if self.last_state is not None:
|
||||||
|
state_diff = sum(abs(a - b) for a, b in zip(next_state, self.last_state))
|
||||||
|
if state_diff < self.min_improvement:
|
||||||
|
self.no_improvement_count += 1
|
||||||
|
else:
|
||||||
|
self.no_improvement_count = 0
|
||||||
|
|
||||||
|
# 检查是否有改善
|
||||||
|
if current_value < self.best_value:
|
||||||
|
self.best_value = current_value
|
||||||
|
self.no_improvement_count = 0
|
||||||
|
else:
|
||||||
|
self.no_improvement_count += 1
|
||||||
|
|
||||||
|
# 如果连续多次没有改善,结束episode
|
||||||
|
if self.no_improvement_count >= self.max_no_improvement:
|
||||||
|
return next_state, 12000 - current_value, True
|
||||||
|
|
||||||
|
self.last_state = next_state
|
||||||
|
self.state = next_state
|
||||||
|
return next_state, 12000 - current_value, False
|
||||||
|
|
||||||
|
def reset(self, state):
|
||||||
|
self.state = state
|
||||||
|
return self.state
|
||||||
|
|
||||||
|
# 初始化 Q-table:使用字典表示,key 为状态 tuple,value 为 dict: action->Q值
|
||||||
|
Q_table = {}
|
||||||
|
|
||||||
|
def get_Q(state, action):
|
||||||
|
if state not in Q_table:
|
||||||
|
Q_table[state] = {a: 0.0 for a in ACTIONS}
|
||||||
|
return Q_table[state][action]
|
||||||
|
|
||||||
|
def set_Q(state, action, value):
|
||||||
|
if state not in Q_table:
|
||||||
|
Q_table[state] = {a: 0.0 for a in ACTIONS}
|
||||||
|
Q_table[state][action] = value
|
||||||
|
|
||||||
|
def choose_action(state, epsilon):
|
||||||
|
# ε-greedy 策略
|
||||||
|
if random.random() < epsilon:
|
||||||
|
return random.choice(ACTIONS)
|
||||||
|
else:
|
||||||
|
if state not in Q_table:
|
||||||
|
Q_table[state] = {a: 0.0 for a in ACTIONS}
|
||||||
|
# 返回Q值最大的动作
|
||||||
|
return max(Q_table[state].items(), key=lambda x: x[1])[0]
|
||||||
|
|
||||||
|
def load_initial_solution(file_path):
|
||||||
|
"""
|
||||||
|
从 JSON 文件加载初始解
|
||||||
|
:param file_path: JSON 文件路径
|
||||||
|
:return: 行切分比例、列切分比例
|
||||||
|
"""
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
|
data = json.load(file)
|
||||||
|
row_cuts = data['row_boundaries']
|
||||||
|
col_cuts = data['col_boundaries']
|
||||||
|
car_paths = data['car_paths']
|
||||||
|
return row_cuts, col_cuts, car_paths
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
random.seed(42)
|
||||||
|
|
||||||
|
# ---------------------------
|
||||||
|
# 需要修改的超参数
|
||||||
|
# ---------------------------
|
||||||
|
solution_path = r"solutions\trav_ga_params2_parallel.json"
|
||||||
|
params_file = r"params2"
|
||||||
|
|
||||||
|
initial_row_cuts, initial_col_cuts, car_paths = load_initial_solution(
|
||||||
|
solution_path)
|
||||||
|
|
||||||
|
initial_state = (0.2, 0.4, 0.7)
|
||||||
|
# Q-learning 主循环
|
||||||
|
env = FunctionEnv(initial_state)
|
||||||
|
|
||||||
|
for episode in range(NUM_EPISODES):
|
||||||
|
print(f"Episode {episode + 1} of {NUM_EPISODES}")
|
||||||
|
state = env.reset(initial_state)
|
||||||
|
done = False
|
||||||
|
|
||||||
|
while not done:
|
||||||
|
# 选择动作
|
||||||
|
action = choose_action(state, EPSILON)
|
||||||
|
# 环境执行动作
|
||||||
|
next_state, reward, done = env.step(action)
|
||||||
|
# Q-learning 更新:Q(s,a) = Q(s,a) + α [r + γ * max_a' Q(s', a') - Q(s,a)]
|
||||||
|
if next_state not in Q_table:
|
||||||
|
Q_table[next_state] = {a: 0.0 for a in ACTIONS}
|
||||||
|
max_next_Q = max(Q_table[next_state].values())
|
||||||
|
current_Q = get_Q(state, action)
|
||||||
|
new_Q = current_Q + ALPHA * (reward + GAMMA * max_next_Q - current_Q)
|
||||||
|
set_Q(state, action, new_Q)
|
||||||
|
state = next_state
|
||||||
|
|
||||||
|
# 可逐步减小探索率
|
||||||
|
EPSILON = max(0.01, EPSILON * 0.999)
|
||||||
|
|
||||||
|
# 输出 Q-table 中最佳策略的状态和值
|
||||||
|
best_state = None
|
||||||
|
best_value = float('inf')
|
||||||
|
for state in Q_table:
|
||||||
|
# 这里根据函数值来评价解的好坏
|
||||||
|
state_value = f(state)
|
||||||
|
if state_value < best_value:
|
||||||
|
best_value = state_value
|
||||||
|
best_state = state
|
||||||
|
|
||||||
|
print("找到的最优状态:", best_state, "对应函数值:", best_value)
|
@ -11,12 +11,12 @@ random.seed(42)
|
|||||||
# ---------------------------
|
# ---------------------------
|
||||||
# 需要修改的超参数
|
# 需要修改的超参数
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
num_iterations = 10000
|
num_iterations = 1000000
|
||||||
# 随机生成分区的行分段数与列分段数
|
# 随机生成分区的行分段数与列分段数
|
||||||
# R = random.randint(0, 3) # 行分段数
|
# R = random.randint(0, 3) # 行分段数
|
||||||
# C = random.randint(0, 3) # 列分段数
|
# C = random.randint(0, 3) # 列分段数
|
||||||
R = 3
|
R = 3
|
||||||
C = 1
|
C = 3
|
||||||
params_file = 'params2'
|
params_file = 'params2'
|
||||||
|
|
||||||
|
|
||||||
|
30
solutions/finetune_params2.json
Normal file
30
solutions/finetune_params2.json
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"row_boundaries": [
|
||||||
|
0.0,
|
||||||
|
0.3000000000000001,
|
||||||
|
0.4800000000000001,
|
||||||
|
0.77,
|
||||||
|
1.0
|
||||||
|
],
|
||||||
|
"col_boundaries": [
|
||||||
|
0.0,
|
||||||
|
0.5,
|
||||||
|
1.0
|
||||||
|
],
|
||||||
|
"car_paths": [
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
5
|
||||||
|
],
|
||||||
|
[
|
||||||
|
4,
|
||||||
|
2,
|
||||||
|
0
|
||||||
|
],
|
||||||
|
[
|
||||||
|
7,
|
||||||
|
6
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
@ -1,30 +1,30 @@
|
|||||||
{
|
{
|
||||||
"row_boundaries": [
|
"row_boundaries": [
|
||||||
0.0,
|
0.0,
|
||||||
0.3,
|
0.5,
|
||||||
0.4,
|
|
||||||
0.7,
|
|
||||||
1.0
|
1.0
|
||||||
],
|
],
|
||||||
"col_boundaries": [
|
"col_boundaries": [
|
||||||
0.0,
|
0.0,
|
||||||
|
0.2,
|
||||||
0.5,
|
0.5,
|
||||||
|
0.8,
|
||||||
1.0
|
1.0
|
||||||
],
|
],
|
||||||
"car_paths": [
|
"car_paths": [
|
||||||
[
|
[
|
||||||
2,
|
2,
|
||||||
0
|
|
||||||
],
|
|
||||||
[
|
|
||||||
4,
|
|
||||||
5,
|
|
||||||
3,
|
3,
|
||||||
1
|
7
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
6,
|
1,
|
||||||
7
|
5,
|
||||||
|
6
|
||||||
|
],
|
||||||
|
[
|
||||||
|
0,
|
||||||
|
4
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
}
|
}
|
30
solutions/trav_ga_params2.json
Normal file
30
solutions/trav_ga_params2.json
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"row_boundaries": [
|
||||||
|
0.0,
|
||||||
|
0.1,
|
||||||
|
0.4,
|
||||||
|
0.7,
|
||||||
|
1.0
|
||||||
|
],
|
||||||
|
"col_boundaries": [
|
||||||
|
0.0,
|
||||||
|
0.5,
|
||||||
|
1.0
|
||||||
|
],
|
||||||
|
"car_paths": [
|
||||||
|
[
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
4
|
||||||
|
],
|
||||||
|
[
|
||||||
|
5,
|
||||||
|
3,
|
||||||
|
1
|
||||||
|
],
|
||||||
|
[
|
||||||
|
7,
|
||||||
|
6
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"row_boundaries": [
|
"row_boundaries": [
|
||||||
0.0,
|
0.0,
|
||||||
0.1,
|
0.2,
|
||||||
0.4,
|
0.4,
|
||||||
0.7,
|
0.7,
|
||||||
1.0
|
1.0
|
||||||
@ -12,16 +12,16 @@
|
|||||||
1.0
|
1.0
|
||||||
],
|
],
|
||||||
"car_paths": [
|
"car_paths": [
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
5
|
||||||
|
],
|
||||||
[
|
[
|
||||||
4,
|
4,
|
||||||
2,
|
2,
|
||||||
0
|
0
|
||||||
],
|
],
|
||||||
[
|
|
||||||
5,
|
|
||||||
3,
|
|
||||||
1
|
|
||||||
],
|
|
||||||
[
|
[
|
||||||
7,
|
7,
|
||||||
6
|
6
|
||||||
|
@ -53,7 +53,7 @@ if __name__ == "__main__":
|
|||||||
# 需要修改的超参数
|
# 需要修改的超参数
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
params_file = 'params2'
|
params_file = 'params2'
|
||||||
solution_file = r'solutions\mtkl_params2.json'
|
solution_file = r'solutions\trav_finetune_params2.json'
|
||||||
|
|
||||||
with open(params_file + '.yml', 'r', encoding='utf-8') as file:
|
with open(params_file + '.yml', 'r', encoding='utf-8') as file:
|
||||||
params = yaml.safe_load(file)
|
params = yaml.safe_load(file)
|
||||||
|
Loading…
Reference in New Issue
Block a user