微调分区

This commit is contained in:
weixin_46229132 2025-03-29 16:28:30 +08:00
parent f05f8400fb
commit f347ca8276
5 changed files with 128 additions and 120 deletions

View File

@ -10,6 +10,7 @@ import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from env_partion import PartitionEnv
# from env import PartitionMazeEnv
# fmt: on
'''Hyperparameter Setting'''
@ -18,7 +19,7 @@ parser.add_argument('--dvc', type=str, default='cpu',
help='running device: cuda or cpu')
parser.add_argument('--EnvIdex', type=int, default=0,
help='PM_PPO_Con, PV1, Lch_Cv2, Humanv4, HCv4, BWv3, BWHv3')
parser.add_argument('--write', type=str2bool, default=True,
parser.add_argument('--write', type=str2bool, default=False,
help='Use SummaryWriter to record the training')
parser.add_argument('--render', type=str2bool,
default=False, help='Render or Not')
@ -28,7 +29,7 @@ parser.add_argument('--ModelIdex', type=int, default=500,
help='which model to load')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument('--T_horizon', type=int, default=20,
parser.add_argument('--T_horizon', type=int, default=15,
help='lenth of long trajectory')
parser.add_argument('--Distribution', type=str, default='Beta',
help='Should be one of Beta ; GS_ms ; GS_m')
@ -36,7 +37,7 @@ parser.add_argument('--Max_train_steps', type=int,
default=int(5e8), help='Max training steps')
parser.add_argument('--save_interval', type=int,
default=int(5e5), help='Model saving interval, in steps.')
parser.add_argument('--eval_interval', type=int, default=int(5e1),
parser.add_argument('--eval_interval', type=int, default=int(5e3),
help='Model evaluating interval, in steps.')
parser.add_argument('--gamma', type=float, default=0.99,
@ -74,10 +75,10 @@ def main():
'Humanv4', 'HCv4', 'BWv3', 'BWHv3']
# Build Env
# env = gym.make(EnvName[opt.EnvIdex], render_mode = "human" if opt.render else None)
env = PartitionEnv()
# eval_env = gym.make(EnvName[opt.EnvIdex])
# env = PartitionMazeEnv()
eval_env = PartitionEnv()
# eval_env = PartitionMazeEnv()
opt.state_dim = env.observation_space.shape[0]
opt.action_dim = env.action_space.shape[0]
opt.max_action = float(env.action_space.high[0])
@ -129,9 +130,9 @@ def main():
'''Interact with Env'''
a, logprob_a = agent.select_action(
s, deterministic=False) # use stochastic when training
# act = Action_adapter(a,opt.max_action) #[0,1] to [-max,max]
act = Action_adapter(a,opt.max_action) #[0,1] to [-max,max]
s_next, r, dw, tr, info = env.step(
a) # dw: dead&win; tr: truncated
act) # dw: dead&win; tr: truncated
# r = Reward_adapter(r, opt.EnvIdex)
done = (dw or tr)
@ -152,6 +153,7 @@ def main():
# evaluate the policy for 3 times, and get averaged result
score = evaluate_policy(
eval_env, agent, opt.max_action, turns=1)
# TODO 保存新的路径
if opt.write:
writer.add_scalar(
'ep_r', score, global_step=total_steps)

View File

@ -143,10 +143,10 @@ def evaluate_policy(env, agent, max_action, turns):
while not done:
# Take deterministic actions when evaluation
a, logprob_a = agent.select_action(s, deterministic=True)
# act = Action_adapter(a, max_action) # [0,1] to [-max,max]
s_next, r, dw, tr, info = env.step(a)
act = Action_adapter(a, max_action) # [0,1] to [-max,max]
s_next, r, dw, tr, info = env.step(act)
done = (dw or tr)
action_series.append(a[0])
action_series.append(act[0])
total_scores += r
s = s_next
print('action series: ', np.round(action_series, 3))

7
env.py
View File

@ -39,9 +39,9 @@ class PartitionMazeEnv(gym.Env):
##############################
# 可能需要手动修改的超参数
##############################
self.CUT_NUM = 4 # 横切一半,竖切一半
self.BASE_LINE = 3500 # 基准时间通过greedy或者蒙特卡洛计算出来
self.MAX_STEPS = 10 # 迷宫走法步数上限
self.CUT_NUM = 6 # 横切一半,竖切一半
self.BASE_LINE = 10000 # 基准时间通过greedy或者蒙特卡洛计算出来
self.MAX_STEPS = 20 # 迷宫走法步数上限
self.phase = 0 # 阶段控制0区域划分阶段1迷宫初始化阶段2走迷宫阶段
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
@ -172,6 +172,7 @@ class PartitionMazeEnv(gym.Env):
])
return state, reward, True, False, {}
else:
print(self.partition_values)
# 进入阶段 1初始化迷宫
self.phase = 1
reward = 0.2

View File

@ -19,12 +19,28 @@ class PartitionEnv(gym.Env):
# 可能需要手动修改的超参数
##############################
self.params = 'params2'
self.ORI_ROW_CUTS = [0, 0.2, 0.4, 0.7, 1]
self.ORI_COL_CUTS = [0, 0.5, 1]
self.CUT_NUM = 4
self.ROW_CUT_LIMIT = 3
self.COL_CUT_LIMIT = 1
self.BASE_LINE = 10000
self.mTSP_STEPS = 10000
# 定义动作空间:全部动作均为 1 维连续 [0,1]
self.action_space = spaces.Box(
low=0.0, high=1.0, shape=(1,), dtype=np.float32)
# 定义观察空间为8维向量
# 前 4 维表示已决策的切分值(未决策部分为 0
self.observation_space = spaces.Box(
low=0.0, high=1.0, shape=(self.CUT_NUM + 4,), dtype=np.float32)
self.partition_step = 0
self.ori_row_cuts = self.ORI_ROW_CUTS[:]
self.ori_col_cuts = self.ORI_COL_CUTS[:]
self.rectangles = []
# 车队参数设置
with open(self.params + '.yml', 'r', encoding='utf-8') as file:
params = yaml.safe_load(file)
@ -45,140 +61,129 @@ class PartitionEnv(gym.Env):
self.trans_energy_factor = params['trans_energy_factor']
self.battery_energy_capacity = params['battery_energy_capacity']
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
self.partition_values = np.zeros(
self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂
# 定义动作空间:全部动作均为 1 维连续 [0,1]
self.action_space = spaces.Box(
low=0.0, high=1.0, shape=(1,), dtype=np.float32)
# 定义观察空间为8维向量
# 前 4 维表示已决策的切分值(未决策部分为 0
self.observation_space = spaces.Box(
low=0.0, high=1.0, shape=(self.CUT_NUM,), dtype=np.float32)
# 切分阶段相关变量
self.col_cuts = [] # 存储竖切位置c₁, c₂当值为0时表示不切
self.row_cuts = [] # 存储横切位置r₁, r₂
self.rectangles = []
def reset(self, seed=None, options=None):
# 重置所有变量回到切分阶段phase 0
self.phase = 0
self.partition_step = 0
self.partition_values = np.zeros(self.CUT_NUM, dtype=np.float32)
self.col_cuts = []
self.row_cuts = []
self.ori_row_cuts = self.ORI_ROW_CUTS[:]
self.ori_col_cuts = self.ORI_COL_CUTS[:]
self.rectangles = []
# 状态:前 4 维为 partition_values其余为区域访问状态初始全0
state = self.partition_values
state = np.array(self.ori_row_cuts + self.ori_col_cuts)
return state
def step(self, action):
# 在所有阶段动作均为 1 维连续动作,取 action[0]
a = float(action[0])
self.partition_values[self.partition_step] = a
adjust = float(action[0])
valid_adjust = True
if self.partition_step < self.ROW_CUT_LIMIT:
row_cut = self.ori_row_cuts[self.partition_step + 1]
new_row_cut = row_cut + adjust
self.ori_row_cuts[self.partition_step + 1] = new_row_cut
if self.ori_row_cuts[self.partition_step] < new_row_cut < self.ori_row_cuts[self.partition_step + 2]:
pass
else:
valid_adjust = False
reward = -100
else:
col_idx = self.partition_step - self.ROW_CUT_LIMIT
col_cut = self.ori_col_cuts[col_idx + 1]
new_col_cut = col_cut + adjust
self.ori_col_cuts[col_idx + 1] = new_col_cut
if self.ori_col_cuts[col_idx] < new_col_cut < self.ori_col_cuts[col_idx + 2]:
pass
else:
valid_adjust = False
reward = -100
self.partition_step += 1
# 构造当前状态:前 partition_step 个为已决策值,其余为 0再补 7 个 0
state = self.partition_values
state = np.array(self.ori_row_cuts + self.ori_col_cuts)
# 如果未完成 4 步则仍处于切分阶段不发奖励done 为 False
if self.partition_step < self.CUT_NUM:
return state, 0.0, False, False, {}
# 出现无效调整,直接结束
if not valid_adjust:
return state, reward, True, False, {}
else:
# 完成 4 步后,计算切分边界
# 过滤掉 0并去重后排序
rows = sorted(
set(v for v in self.partition_values[:self.ROW_CUT_LIMIT] if v > 0))
cols = sorted(
set(v for v in self.partition_values[self.ROW_CUT_LIMIT:] if v > 0))
rows = rows if rows else []
cols = cols if cols else []
# 边界:始终包含 0 和 1
self.row_cuts = [0.0] + rows + [1.0]
self.col_cuts = [0.0] + cols + [1.0]
# 判断分区是否合理,并计算各个分区的任务卸载率ρ
valid_partition = True
for i in range(len(self.row_cuts) - 1):
for j in range(len(self.col_cuts) - 1):
d = (self.col_cuts[j+1] - self.col_cuts[j]) * self.W * \
(self.row_cuts[i+1] - self.row_cuts[i]) * self.H
rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
(self.comp_time_factor - self.trans_time_factor)
rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
(self.comp_energy_factor * d -
self.trans_energy_factor * d)
if rho_energy_limit < 0:
valid_partition = False
break
rho = min(rho_time_limit, rho_energy_limit)
flight_time = self.flight_time_factor * d
bs_time = self.bs_time_factor * (1 - rho) * d
self.rectangles.append({
'center': ((self.row_cuts[i] + self.row_cuts[i+1]) * self.H / 2, (self.col_cuts[j+1] + self.col_cuts[j]) * self.W / 2),
'flight_time': flight_time,
'bs_time': bs_time,
})
if not valid_partition:
break
if not valid_partition:
reward = -100
state = self.partition_values
return state, reward, True, False, {}
if self.partition_step < self.CUT_NUM:
return state, 0.0, False, False, {}
else:
reward = 0
state = self.partition_values
# 完成 4 步后,判断分区是否合理,并计算各个分区的任务卸载率ρ
valid_partition = True
for i in range(len(self.ori_row_cuts) - 1):
for j in range(len(self.ori_col_cuts) - 1):
d = (self.ori_col_cuts[j+1] - self.ori_col_cuts[j]) * self.W * \
(self.ori_row_cuts[i+1] -
self.ori_row_cuts[i]) * self.H
rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
(self.comp_time_factor - self.trans_time_factor)
rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
(self.comp_energy_factor * d -
self.trans_energy_factor * d)
if rho_energy_limit < 0:
valid_partition = False
break
rho = min(rho_time_limit, rho_energy_limit)
# 继续进行路径规划
# 使用q_learning解多旅行商
# cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标
# rec_center_lt = [rec_info['center']
# for rec_info in self.rectangles]
# cities = np.column_stack(rec_center_lt)
# cities = np.column_stack((self.center, cities))
flight_time = self.flight_time_factor * d
bs_time = self.bs_time_factor * (1 - rho) * d
# center_idx = []
# for i in range(self.num_cars - 1):
# cities = np.column_stack((cities, self.center))
# center_idx.append(cities.shape[1] - 1)
self.rectangles.append({
'center': ((self.ori_row_cuts[i] + self.ori_row_cuts[i+1]) * self.H / 2, (self.ori_col_cuts[j+1] + self.ori_col_cuts[j]) * self.W / 2),
'flight_time': flight_time,
'bs_time': bs_time,
})
if not valid_partition:
break
# tsp = mTSP(params=self.params, num_cities=cities.shape[1], cities=cities, num_cars=self.num_cars,
# center_idx=center_idx, rectangles=self.rectangles)
if not valid_partition:
reward = -10
return state, reward, True, False, {}
else:
# 继续进行路径规划
# 使用q_learning解多旅行商
# cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标
# rec_center_lt = [rec_info['center']
# for rec_info in self.rectangles]
# cities = np.column_stack(rec_center_lt)
# cities = np.column_stack((self.center, cities))
# best_time, best_path = tsp.train(self.mTSP_STEPS)
# center_idx = []
# for i in range(self.num_cars - 1):
# cities = np.column_stack((cities, self.center))
# center_idx.append(cities.shape[1] - 1)
# 使用遗传算法解多旅行商
cities = [self.center]
for rec in self.rectangles:
cities.append(rec['center'])
cities = np.array(cities)
# tsp = mTSP(params=self.params, num_cities=cities.shape[1], cities=cities, num_cars=self.num_cars,
# center_idx=center_idx, rectangles=self.rectangles)
center_idx = [0]
for i in range(self.num_cars - 1):
cities = np.row_stack((cities, self.center))
center_idx.append(cities.shape[0] - 1)
# best_time, best_path = tsp.train(self.mTSP_STEPS)
ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20,
data=cities, to_process_idx=center_idx, rectangles=self.rectangles)
# 使用遗传算法解多旅行商
cities = [self.center]
for rec in self.rectangles:
cities.append(rec['center'])
cities = np.array(cities)
best_path, best_time = ga.run()
center_idx = [0]
for i in range(self.num_cars - 1):
cities = np.row_stack((cities, self.center))
center_idx.append(cities.shape[0] - 1)
# print(best_time)
# print(best_path)
ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20,
data=cities, to_process_idx=center_idx, rectangles=self.rectangles)
reward += self.BASE_LINE - best_time
print(reward)
best_path, best_time = ga.run()
return state, reward, True, False, best_path
# print(best_time)
# print(best_path)
reward = self.BASE_LINE / best_time
return state, reward, True, False, best_path
def render(self):
if self.phase == 1:

View File

@ -11,7 +11,7 @@ print('state:', state)
# action_series = [[0.67], [0], [0], [0], [0.7]]
# action_series = [0, 0, 3, 0, 10]
action_series = [[0.2], [0.4], [0.7], [0.5]]
# action_series = [[0.5], [0.5]]
action_series = [[-0.1], [0], [0], [0]]
for i in range(100):
action = action_series[i]