微调分区
This commit is contained in:
parent
f05f8400fb
commit
f347ca8276
@ -10,6 +10,7 @@ import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from env_partion import PartitionEnv
|
||||
# from env import PartitionMazeEnv
|
||||
# fmt: on
|
||||
|
||||
'''Hyperparameter Setting'''
|
||||
@ -18,7 +19,7 @@ parser.add_argument('--dvc', type=str, default='cpu',
|
||||
help='running device: cuda or cpu')
|
||||
parser.add_argument('--EnvIdex', type=int, default=0,
|
||||
help='PM_PPO_Con, PV1, Lch_Cv2, Humanv4, HCv4, BWv3, BWHv3')
|
||||
parser.add_argument('--write', type=str2bool, default=True,
|
||||
parser.add_argument('--write', type=str2bool, default=False,
|
||||
help='Use SummaryWriter to record the training')
|
||||
parser.add_argument('--render', type=str2bool,
|
||||
default=False, help='Render or Not')
|
||||
@ -28,7 +29,7 @@ parser.add_argument('--ModelIdex', type=int, default=500,
|
||||
help='which model to load')
|
||||
|
||||
parser.add_argument('--seed', type=int, default=0, help='random seed')
|
||||
parser.add_argument('--T_horizon', type=int, default=20,
|
||||
parser.add_argument('--T_horizon', type=int, default=15,
|
||||
help='lenth of long trajectory')
|
||||
parser.add_argument('--Distribution', type=str, default='Beta',
|
||||
help='Should be one of Beta ; GS_ms ; GS_m')
|
||||
@ -36,7 +37,7 @@ parser.add_argument('--Max_train_steps', type=int,
|
||||
default=int(5e8), help='Max training steps')
|
||||
parser.add_argument('--save_interval', type=int,
|
||||
default=int(5e5), help='Model saving interval, in steps.')
|
||||
parser.add_argument('--eval_interval', type=int, default=int(5e1),
|
||||
parser.add_argument('--eval_interval', type=int, default=int(5e3),
|
||||
help='Model evaluating interval, in steps.')
|
||||
|
||||
parser.add_argument('--gamma', type=float, default=0.99,
|
||||
@ -74,10 +75,10 @@ def main():
|
||||
'Humanv4', 'HCv4', 'BWv3', 'BWHv3']
|
||||
|
||||
# Build Env
|
||||
# env = gym.make(EnvName[opt.EnvIdex], render_mode = "human" if opt.render else None)
|
||||
env = PartitionEnv()
|
||||
# eval_env = gym.make(EnvName[opt.EnvIdex])
|
||||
# env = PartitionMazeEnv()
|
||||
eval_env = PartitionEnv()
|
||||
# eval_env = PartitionMazeEnv()
|
||||
opt.state_dim = env.observation_space.shape[0]
|
||||
opt.action_dim = env.action_space.shape[0]
|
||||
opt.max_action = float(env.action_space.high[0])
|
||||
@ -129,9 +130,9 @@ def main():
|
||||
'''Interact with Env'''
|
||||
a, logprob_a = agent.select_action(
|
||||
s, deterministic=False) # use stochastic when training
|
||||
# act = Action_adapter(a,opt.max_action) #[0,1] to [-max,max]
|
||||
act = Action_adapter(a,opt.max_action) #[0,1] to [-max,max]
|
||||
s_next, r, dw, tr, info = env.step(
|
||||
a) # dw: dead&win; tr: truncated
|
||||
act) # dw: dead&win; tr: truncated
|
||||
# r = Reward_adapter(r, opt.EnvIdex)
|
||||
done = (dw or tr)
|
||||
|
||||
@ -152,6 +153,7 @@ def main():
|
||||
# evaluate the policy for 3 times, and get averaged result
|
||||
score = evaluate_policy(
|
||||
eval_env, agent, opt.max_action, turns=1)
|
||||
# TODO 保存新的路径
|
||||
if opt.write:
|
||||
writer.add_scalar(
|
||||
'ep_r', score, global_step=total_steps)
|
||||
|
@ -143,10 +143,10 @@ def evaluate_policy(env, agent, max_action, turns):
|
||||
while not done:
|
||||
# Take deterministic actions when evaluation
|
||||
a, logprob_a = agent.select_action(s, deterministic=True)
|
||||
# act = Action_adapter(a, max_action) # [0,1] to [-max,max]
|
||||
s_next, r, dw, tr, info = env.step(a)
|
||||
act = Action_adapter(a, max_action) # [0,1] to [-max,max]
|
||||
s_next, r, dw, tr, info = env.step(act)
|
||||
done = (dw or tr)
|
||||
action_series.append(a[0])
|
||||
action_series.append(act[0])
|
||||
total_scores += r
|
||||
s = s_next
|
||||
print('action series: ', np.round(action_series, 3))
|
||||
|
7
env.py
7
env.py
@ -39,9 +39,9 @@ class PartitionMazeEnv(gym.Env):
|
||||
##############################
|
||||
# 可能需要手动修改的超参数
|
||||
##############################
|
||||
self.CUT_NUM = 4 # 横切一半,竖切一半
|
||||
self.BASE_LINE = 3500 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
||||
self.MAX_STEPS = 10 # 迷宫走法步数上限
|
||||
self.CUT_NUM = 6 # 横切一半,竖切一半
|
||||
self.BASE_LINE = 10000 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
||||
self.MAX_STEPS = 20 # 迷宫走法步数上限
|
||||
|
||||
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
||||
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
||||
@ -172,6 +172,7 @@ class PartitionMazeEnv(gym.Env):
|
||||
])
|
||||
return state, reward, True, False, {}
|
||||
else:
|
||||
print(self.partition_values)
|
||||
# 进入阶段 1:初始化迷宫
|
||||
self.phase = 1
|
||||
reward = 0.2
|
||||
|
217
env_partion.py
217
env_partion.py
@ -19,12 +19,28 @@ class PartitionEnv(gym.Env):
|
||||
# 可能需要手动修改的超参数
|
||||
##############################
|
||||
self.params = 'params2'
|
||||
self.ORI_ROW_CUTS = [0, 0.2, 0.4, 0.7, 1]
|
||||
self.ORI_COL_CUTS = [0, 0.5, 1]
|
||||
self.CUT_NUM = 4
|
||||
self.ROW_CUT_LIMIT = 3
|
||||
self.COL_CUT_LIMIT = 1
|
||||
self.BASE_LINE = 10000
|
||||
self.mTSP_STEPS = 10000
|
||||
|
||||
# 定义动作空间:全部动作均为 1 维连续 [0,1]
|
||||
self.action_space = spaces.Box(
|
||||
low=0.0, high=1.0, shape=(1,), dtype=np.float32)
|
||||
|
||||
# 定义观察空间为8维向量
|
||||
# 前 4 维表示已决策的切分值(未决策部分为 0)
|
||||
self.observation_space = spaces.Box(
|
||||
low=0.0, high=1.0, shape=(self.CUT_NUM + 4,), dtype=np.float32)
|
||||
|
||||
self.partition_step = 0
|
||||
self.ori_row_cuts = self.ORI_ROW_CUTS[:]
|
||||
self.ori_col_cuts = self.ORI_COL_CUTS[:]
|
||||
self.rectangles = []
|
||||
|
||||
# 车队参数设置
|
||||
with open(self.params + '.yml', 'r', encoding='utf-8') as file:
|
||||
params = yaml.safe_load(file)
|
||||
@ -45,140 +61,129 @@ class PartitionEnv(gym.Env):
|
||||
self.trans_energy_factor = params['trans_energy_factor']
|
||||
self.battery_energy_capacity = params['battery_energy_capacity']
|
||||
|
||||
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
||||
self.partition_values = np.zeros(
|
||||
self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂
|
||||
|
||||
# 定义动作空间:全部动作均为 1 维连续 [0,1]
|
||||
self.action_space = spaces.Box(
|
||||
low=0.0, high=1.0, shape=(1,), dtype=np.float32)
|
||||
|
||||
# 定义观察空间为8维向量
|
||||
# 前 4 维表示已决策的切分值(未决策部分为 0)
|
||||
self.observation_space = spaces.Box(
|
||||
low=0.0, high=1.0, shape=(self.CUT_NUM,), dtype=np.float32)
|
||||
|
||||
# 切分阶段相关变量
|
||||
self.col_cuts = [] # 存储竖切位置(c₁, c₂),当值为0时表示不切
|
||||
self.row_cuts = [] # 存储横切位置(r₁, r₂)
|
||||
self.rectangles = []
|
||||
|
||||
def reset(self, seed=None, options=None):
|
||||
# 重置所有变量,回到切分阶段(phase 0)
|
||||
self.phase = 0
|
||||
self.partition_step = 0
|
||||
self.partition_values = np.zeros(self.CUT_NUM, dtype=np.float32)
|
||||
self.col_cuts = []
|
||||
self.row_cuts = []
|
||||
self.ori_row_cuts = self.ORI_ROW_CUTS[:]
|
||||
self.ori_col_cuts = self.ORI_COL_CUTS[:]
|
||||
self.rectangles = []
|
||||
|
||||
# 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0)
|
||||
state = self.partition_values
|
||||
state = np.array(self.ori_row_cuts + self.ori_col_cuts)
|
||||
|
||||
return state
|
||||
|
||||
def step(self, action):
|
||||
# 在所有阶段动作均为 1 维连续动作,取 action[0]
|
||||
a = float(action[0])
|
||||
self.partition_values[self.partition_step] = a
|
||||
adjust = float(action[0])
|
||||
valid_adjust = True
|
||||
|
||||
if self.partition_step < self.ROW_CUT_LIMIT:
|
||||
row_cut = self.ori_row_cuts[self.partition_step + 1]
|
||||
new_row_cut = row_cut + adjust
|
||||
self.ori_row_cuts[self.partition_step + 1] = new_row_cut
|
||||
|
||||
if self.ori_row_cuts[self.partition_step] < new_row_cut < self.ori_row_cuts[self.partition_step + 2]:
|
||||
pass
|
||||
else:
|
||||
valid_adjust = False
|
||||
reward = -100
|
||||
else:
|
||||
col_idx = self.partition_step - self.ROW_CUT_LIMIT
|
||||
col_cut = self.ori_col_cuts[col_idx + 1]
|
||||
new_col_cut = col_cut + adjust
|
||||
self.ori_col_cuts[col_idx + 1] = new_col_cut
|
||||
|
||||
if self.ori_col_cuts[col_idx] < new_col_cut < self.ori_col_cuts[col_idx + 2]:
|
||||
pass
|
||||
else:
|
||||
valid_adjust = False
|
||||
reward = -100
|
||||
|
||||
self.partition_step += 1
|
||||
|
||||
# 构造当前状态:前 partition_step 个为已决策值,其余为 0,再补 7 个 0
|
||||
state = self.partition_values
|
||||
state = np.array(self.ori_row_cuts + self.ori_col_cuts)
|
||||
|
||||
# 如果未完成 4 步,则仍处于切分阶段,不发奖励,done 为 False
|
||||
if self.partition_step < self.CUT_NUM:
|
||||
return state, 0.0, False, False, {}
|
||||
# 出现无效调整,直接结束
|
||||
if not valid_adjust:
|
||||
return state, reward, True, False, {}
|
||||
else:
|
||||
# 完成 4 步后,计算切分边界
|
||||
# 过滤掉 0,并去重后排序
|
||||
rows = sorted(
|
||||
set(v for v in self.partition_values[:self.ROW_CUT_LIMIT] if v > 0))
|
||||
cols = sorted(
|
||||
set(v for v in self.partition_values[self.ROW_CUT_LIMIT:] if v > 0))
|
||||
rows = rows if rows else []
|
||||
cols = cols if cols else []
|
||||
|
||||
# 边界:始终包含 0 和 1
|
||||
self.row_cuts = [0.0] + rows + [1.0]
|
||||
self.col_cuts = [0.0] + cols + [1.0]
|
||||
|
||||
# 判断分区是否合理,并计算各个分区的任务卸载率ρ
|
||||
valid_partition = True
|
||||
for i in range(len(self.row_cuts) - 1):
|
||||
for j in range(len(self.col_cuts) - 1):
|
||||
d = (self.col_cuts[j+1] - self.col_cuts[j]) * self.W * \
|
||||
(self.row_cuts[i+1] - self.row_cuts[i]) * self.H
|
||||
rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
|
||||
(self.comp_time_factor - self.trans_time_factor)
|
||||
rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
|
||||
(self.comp_energy_factor * d -
|
||||
self.trans_energy_factor * d)
|
||||
if rho_energy_limit < 0:
|
||||
valid_partition = False
|
||||
break
|
||||
rho = min(rho_time_limit, rho_energy_limit)
|
||||
|
||||
flight_time = self.flight_time_factor * d
|
||||
bs_time = self.bs_time_factor * (1 - rho) * d
|
||||
|
||||
self.rectangles.append({
|
||||
'center': ((self.row_cuts[i] + self.row_cuts[i+1]) * self.H / 2, (self.col_cuts[j+1] + self.col_cuts[j]) * self.W / 2),
|
||||
'flight_time': flight_time,
|
||||
'bs_time': bs_time,
|
||||
})
|
||||
if not valid_partition:
|
||||
break
|
||||
|
||||
if not valid_partition:
|
||||
reward = -100
|
||||
state = self.partition_values
|
||||
return state, reward, True, False, {}
|
||||
if self.partition_step < self.CUT_NUM:
|
||||
return state, 0.0, False, False, {}
|
||||
else:
|
||||
reward = 0
|
||||
state = self.partition_values
|
||||
# 完成 4 步后,判断分区是否合理,并计算各个分区的任务卸载率ρ
|
||||
valid_partition = True
|
||||
for i in range(len(self.ori_row_cuts) - 1):
|
||||
for j in range(len(self.ori_col_cuts) - 1):
|
||||
d = (self.ori_col_cuts[j+1] - self.ori_col_cuts[j]) * self.W * \
|
||||
(self.ori_row_cuts[i+1] -
|
||||
self.ori_row_cuts[i]) * self.H
|
||||
rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
|
||||
(self.comp_time_factor - self.trans_time_factor)
|
||||
rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
|
||||
(self.comp_energy_factor * d -
|
||||
self.trans_energy_factor * d)
|
||||
if rho_energy_limit < 0:
|
||||
valid_partition = False
|
||||
break
|
||||
rho = min(rho_time_limit, rho_energy_limit)
|
||||
|
||||
# 继续进行路径规划
|
||||
# 使用q_learning解多旅行商
|
||||
# cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标
|
||||
# rec_center_lt = [rec_info['center']
|
||||
# for rec_info in self.rectangles]
|
||||
# cities = np.column_stack(rec_center_lt)
|
||||
# cities = np.column_stack((self.center, cities))
|
||||
flight_time = self.flight_time_factor * d
|
||||
bs_time = self.bs_time_factor * (1 - rho) * d
|
||||
|
||||
# center_idx = []
|
||||
# for i in range(self.num_cars - 1):
|
||||
# cities = np.column_stack((cities, self.center))
|
||||
# center_idx.append(cities.shape[1] - 1)
|
||||
self.rectangles.append({
|
||||
'center': ((self.ori_row_cuts[i] + self.ori_row_cuts[i+1]) * self.H / 2, (self.ori_col_cuts[j+1] + self.ori_col_cuts[j]) * self.W / 2),
|
||||
'flight_time': flight_time,
|
||||
'bs_time': bs_time,
|
||||
})
|
||||
if not valid_partition:
|
||||
break
|
||||
|
||||
# tsp = mTSP(params=self.params, num_cities=cities.shape[1], cities=cities, num_cars=self.num_cars,
|
||||
# center_idx=center_idx, rectangles=self.rectangles)
|
||||
if not valid_partition:
|
||||
reward = -10
|
||||
return state, reward, True, False, {}
|
||||
else:
|
||||
# 继续进行路径规划
|
||||
# 使用q_learning解多旅行商
|
||||
# cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标
|
||||
# rec_center_lt = [rec_info['center']
|
||||
# for rec_info in self.rectangles]
|
||||
# cities = np.column_stack(rec_center_lt)
|
||||
# cities = np.column_stack((self.center, cities))
|
||||
|
||||
# best_time, best_path = tsp.train(self.mTSP_STEPS)
|
||||
# center_idx = []
|
||||
# for i in range(self.num_cars - 1):
|
||||
# cities = np.column_stack((cities, self.center))
|
||||
# center_idx.append(cities.shape[1] - 1)
|
||||
|
||||
# 使用遗传算法解多旅行商
|
||||
cities = [self.center]
|
||||
for rec in self.rectangles:
|
||||
cities.append(rec['center'])
|
||||
cities = np.array(cities)
|
||||
# tsp = mTSP(params=self.params, num_cities=cities.shape[1], cities=cities, num_cars=self.num_cars,
|
||||
# center_idx=center_idx, rectangles=self.rectangles)
|
||||
|
||||
center_idx = [0]
|
||||
for i in range(self.num_cars - 1):
|
||||
cities = np.row_stack((cities, self.center))
|
||||
center_idx.append(cities.shape[0] - 1)
|
||||
# best_time, best_path = tsp.train(self.mTSP_STEPS)
|
||||
|
||||
ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20,
|
||||
data=cities, to_process_idx=center_idx, rectangles=self.rectangles)
|
||||
# 使用遗传算法解多旅行商
|
||||
cities = [self.center]
|
||||
for rec in self.rectangles:
|
||||
cities.append(rec['center'])
|
||||
cities = np.array(cities)
|
||||
|
||||
best_path, best_time = ga.run()
|
||||
center_idx = [0]
|
||||
for i in range(self.num_cars - 1):
|
||||
cities = np.row_stack((cities, self.center))
|
||||
center_idx.append(cities.shape[0] - 1)
|
||||
|
||||
# print(best_time)
|
||||
# print(best_path)
|
||||
ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20,
|
||||
data=cities, to_process_idx=center_idx, rectangles=self.rectangles)
|
||||
|
||||
reward += self.BASE_LINE - best_time
|
||||
print(reward)
|
||||
best_path, best_time = ga.run()
|
||||
|
||||
return state, reward, True, False, best_path
|
||||
# print(best_time)
|
||||
# print(best_path)
|
||||
|
||||
reward = self.BASE_LINE / best_time
|
||||
|
||||
return state, reward, True, False, best_path
|
||||
|
||||
def render(self):
|
||||
if self.phase == 1:
|
||||
|
@ -11,7 +11,7 @@ print('state:', state)
|
||||
# action_series = [[0.67], [0], [0], [0], [0.7]]
|
||||
# action_series = [0, 0, 3, 0, 10]
|
||||
action_series = [[0.2], [0.4], [0.7], [0.5]]
|
||||
# action_series = [[0.5], [0.5]]
|
||||
action_series = [[-0.1], [0], [0], [0]]
|
||||
|
||||
for i in range(100):
|
||||
action = action_series[i]
|
||||
|
Loading…
Reference in New Issue
Block a user