微调分区
This commit is contained in:
parent
f05f8400fb
commit
f347ca8276
@ -10,6 +10,7 @@ import sys
|
|||||||
import os
|
import os
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
from env_partion import PartitionEnv
|
from env_partion import PartitionEnv
|
||||||
|
# from env import PartitionMazeEnv
|
||||||
# fmt: on
|
# fmt: on
|
||||||
|
|
||||||
'''Hyperparameter Setting'''
|
'''Hyperparameter Setting'''
|
||||||
@ -18,7 +19,7 @@ parser.add_argument('--dvc', type=str, default='cpu',
|
|||||||
help='running device: cuda or cpu')
|
help='running device: cuda or cpu')
|
||||||
parser.add_argument('--EnvIdex', type=int, default=0,
|
parser.add_argument('--EnvIdex', type=int, default=0,
|
||||||
help='PM_PPO_Con, PV1, Lch_Cv2, Humanv4, HCv4, BWv3, BWHv3')
|
help='PM_PPO_Con, PV1, Lch_Cv2, Humanv4, HCv4, BWv3, BWHv3')
|
||||||
parser.add_argument('--write', type=str2bool, default=True,
|
parser.add_argument('--write', type=str2bool, default=False,
|
||||||
help='Use SummaryWriter to record the training')
|
help='Use SummaryWriter to record the training')
|
||||||
parser.add_argument('--render', type=str2bool,
|
parser.add_argument('--render', type=str2bool,
|
||||||
default=False, help='Render or Not')
|
default=False, help='Render or Not')
|
||||||
@ -28,7 +29,7 @@ parser.add_argument('--ModelIdex', type=int, default=500,
|
|||||||
help='which model to load')
|
help='which model to load')
|
||||||
|
|
||||||
parser.add_argument('--seed', type=int, default=0, help='random seed')
|
parser.add_argument('--seed', type=int, default=0, help='random seed')
|
||||||
parser.add_argument('--T_horizon', type=int, default=20,
|
parser.add_argument('--T_horizon', type=int, default=15,
|
||||||
help='lenth of long trajectory')
|
help='lenth of long trajectory')
|
||||||
parser.add_argument('--Distribution', type=str, default='Beta',
|
parser.add_argument('--Distribution', type=str, default='Beta',
|
||||||
help='Should be one of Beta ; GS_ms ; GS_m')
|
help='Should be one of Beta ; GS_ms ; GS_m')
|
||||||
@ -36,7 +37,7 @@ parser.add_argument('--Max_train_steps', type=int,
|
|||||||
default=int(5e8), help='Max training steps')
|
default=int(5e8), help='Max training steps')
|
||||||
parser.add_argument('--save_interval', type=int,
|
parser.add_argument('--save_interval', type=int,
|
||||||
default=int(5e5), help='Model saving interval, in steps.')
|
default=int(5e5), help='Model saving interval, in steps.')
|
||||||
parser.add_argument('--eval_interval', type=int, default=int(5e1),
|
parser.add_argument('--eval_interval', type=int, default=int(5e3),
|
||||||
help='Model evaluating interval, in steps.')
|
help='Model evaluating interval, in steps.')
|
||||||
|
|
||||||
parser.add_argument('--gamma', type=float, default=0.99,
|
parser.add_argument('--gamma', type=float, default=0.99,
|
||||||
@ -74,10 +75,10 @@ def main():
|
|||||||
'Humanv4', 'HCv4', 'BWv3', 'BWHv3']
|
'Humanv4', 'HCv4', 'BWv3', 'BWHv3']
|
||||||
|
|
||||||
# Build Env
|
# Build Env
|
||||||
# env = gym.make(EnvName[opt.EnvIdex], render_mode = "human" if opt.render else None)
|
|
||||||
env = PartitionEnv()
|
env = PartitionEnv()
|
||||||
# eval_env = gym.make(EnvName[opt.EnvIdex])
|
# env = PartitionMazeEnv()
|
||||||
eval_env = PartitionEnv()
|
eval_env = PartitionEnv()
|
||||||
|
# eval_env = PartitionMazeEnv()
|
||||||
opt.state_dim = env.observation_space.shape[0]
|
opt.state_dim = env.observation_space.shape[0]
|
||||||
opt.action_dim = env.action_space.shape[0]
|
opt.action_dim = env.action_space.shape[0]
|
||||||
opt.max_action = float(env.action_space.high[0])
|
opt.max_action = float(env.action_space.high[0])
|
||||||
@ -129,9 +130,9 @@ def main():
|
|||||||
'''Interact with Env'''
|
'''Interact with Env'''
|
||||||
a, logprob_a = agent.select_action(
|
a, logprob_a = agent.select_action(
|
||||||
s, deterministic=False) # use stochastic when training
|
s, deterministic=False) # use stochastic when training
|
||||||
# act = Action_adapter(a,opt.max_action) #[0,1] to [-max,max]
|
act = Action_adapter(a,opt.max_action) #[0,1] to [-max,max]
|
||||||
s_next, r, dw, tr, info = env.step(
|
s_next, r, dw, tr, info = env.step(
|
||||||
a) # dw: dead&win; tr: truncated
|
act) # dw: dead&win; tr: truncated
|
||||||
# r = Reward_adapter(r, opt.EnvIdex)
|
# r = Reward_adapter(r, opt.EnvIdex)
|
||||||
done = (dw or tr)
|
done = (dw or tr)
|
||||||
|
|
||||||
@ -152,6 +153,7 @@ def main():
|
|||||||
# evaluate the policy for 3 times, and get averaged result
|
# evaluate the policy for 3 times, and get averaged result
|
||||||
score = evaluate_policy(
|
score = evaluate_policy(
|
||||||
eval_env, agent, opt.max_action, turns=1)
|
eval_env, agent, opt.max_action, turns=1)
|
||||||
|
# TODO 保存新的路径
|
||||||
if opt.write:
|
if opt.write:
|
||||||
writer.add_scalar(
|
writer.add_scalar(
|
||||||
'ep_r', score, global_step=total_steps)
|
'ep_r', score, global_step=total_steps)
|
||||||
|
@ -143,10 +143,10 @@ def evaluate_policy(env, agent, max_action, turns):
|
|||||||
while not done:
|
while not done:
|
||||||
# Take deterministic actions when evaluation
|
# Take deterministic actions when evaluation
|
||||||
a, logprob_a = agent.select_action(s, deterministic=True)
|
a, logprob_a = agent.select_action(s, deterministic=True)
|
||||||
# act = Action_adapter(a, max_action) # [0,1] to [-max,max]
|
act = Action_adapter(a, max_action) # [0,1] to [-max,max]
|
||||||
s_next, r, dw, tr, info = env.step(a)
|
s_next, r, dw, tr, info = env.step(act)
|
||||||
done = (dw or tr)
|
done = (dw or tr)
|
||||||
action_series.append(a[0])
|
action_series.append(act[0])
|
||||||
total_scores += r
|
total_scores += r
|
||||||
s = s_next
|
s = s_next
|
||||||
print('action series: ', np.round(action_series, 3))
|
print('action series: ', np.round(action_series, 3))
|
||||||
|
7
env.py
7
env.py
@ -39,9 +39,9 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
##############################
|
##############################
|
||||||
# 可能需要手动修改的超参数
|
# 可能需要手动修改的超参数
|
||||||
##############################
|
##############################
|
||||||
self.CUT_NUM = 4 # 横切一半,竖切一半
|
self.CUT_NUM = 6 # 横切一半,竖切一半
|
||||||
self.BASE_LINE = 3500 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
self.BASE_LINE = 10000 # 基准时间,通过greedy或者蒙特卡洛计算出来
|
||||||
self.MAX_STEPS = 10 # 迷宫走法步数上限
|
self.MAX_STEPS = 20 # 迷宫走法步数上限
|
||||||
|
|
||||||
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
self.phase = 0 # 阶段控制,0:区域划分阶段,1:迷宫初始化阶段,2:走迷宫阶段
|
||||||
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
||||||
@ -172,6 +172,7 @@ class PartitionMazeEnv(gym.Env):
|
|||||||
])
|
])
|
||||||
return state, reward, True, False, {}
|
return state, reward, True, False, {}
|
||||||
else:
|
else:
|
||||||
|
print(self.partition_values)
|
||||||
# 进入阶段 1:初始化迷宫
|
# 进入阶段 1:初始化迷宫
|
||||||
self.phase = 1
|
self.phase = 1
|
||||||
reward = 0.2
|
reward = 0.2
|
||||||
|
111
env_partion.py
111
env_partion.py
@ -19,12 +19,28 @@ class PartitionEnv(gym.Env):
|
|||||||
# 可能需要手动修改的超参数
|
# 可能需要手动修改的超参数
|
||||||
##############################
|
##############################
|
||||||
self.params = 'params2'
|
self.params = 'params2'
|
||||||
|
self.ORI_ROW_CUTS = [0, 0.2, 0.4, 0.7, 1]
|
||||||
|
self.ORI_COL_CUTS = [0, 0.5, 1]
|
||||||
self.CUT_NUM = 4
|
self.CUT_NUM = 4
|
||||||
self.ROW_CUT_LIMIT = 3
|
self.ROW_CUT_LIMIT = 3
|
||||||
self.COL_CUT_LIMIT = 1
|
self.COL_CUT_LIMIT = 1
|
||||||
self.BASE_LINE = 10000
|
self.BASE_LINE = 10000
|
||||||
self.mTSP_STEPS = 10000
|
self.mTSP_STEPS = 10000
|
||||||
|
|
||||||
|
# 定义动作空间:全部动作均为 1 维连续 [0,1]
|
||||||
|
self.action_space = spaces.Box(
|
||||||
|
low=0.0, high=1.0, shape=(1,), dtype=np.float32)
|
||||||
|
|
||||||
|
# 定义观察空间为8维向量
|
||||||
|
# 前 4 维表示已决策的切分值(未决策部分为 0)
|
||||||
|
self.observation_space = spaces.Box(
|
||||||
|
low=0.0, high=1.0, shape=(self.CUT_NUM + 4,), dtype=np.float32)
|
||||||
|
|
||||||
|
self.partition_step = 0
|
||||||
|
self.ori_row_cuts = self.ORI_ROW_CUTS[:]
|
||||||
|
self.ori_col_cuts = self.ORI_COL_CUTS[:]
|
||||||
|
self.rectangles = []
|
||||||
|
|
||||||
# 车队参数设置
|
# 车队参数设置
|
||||||
with open(self.params + '.yml', 'r', encoding='utf-8') as file:
|
with open(self.params + '.yml', 'r', encoding='utf-8') as file:
|
||||||
params = yaml.safe_load(file)
|
params = yaml.safe_load(file)
|
||||||
@ -45,70 +61,64 @@ class PartitionEnv(gym.Env):
|
|||||||
self.trans_energy_factor = params['trans_energy_factor']
|
self.trans_energy_factor = params['trans_energy_factor']
|
||||||
self.battery_energy_capacity = params['battery_energy_capacity']
|
self.battery_energy_capacity = params['battery_energy_capacity']
|
||||||
|
|
||||||
self.partition_step = 0 # 区域划分阶段步数,范围 0~4
|
|
||||||
self.partition_values = np.zeros(
|
|
||||||
self.CUT_NUM, dtype=np.float32) # 存储 c₁, c₂, r₁, r₂
|
|
||||||
|
|
||||||
# 定义动作空间:全部动作均为 1 维连续 [0,1]
|
|
||||||
self.action_space = spaces.Box(
|
|
||||||
low=0.0, high=1.0, shape=(1,), dtype=np.float32)
|
|
||||||
|
|
||||||
# 定义观察空间为8维向量
|
|
||||||
# 前 4 维表示已决策的切分值(未决策部分为 0)
|
|
||||||
self.observation_space = spaces.Box(
|
|
||||||
low=0.0, high=1.0, shape=(self.CUT_NUM,), dtype=np.float32)
|
|
||||||
|
|
||||||
# 切分阶段相关变量
|
|
||||||
self.col_cuts = [] # 存储竖切位置(c₁, c₂),当值为0时表示不切
|
|
||||||
self.row_cuts = [] # 存储横切位置(r₁, r₂)
|
|
||||||
self.rectangles = []
|
|
||||||
|
|
||||||
def reset(self, seed=None, options=None):
|
def reset(self, seed=None, options=None):
|
||||||
# 重置所有变量,回到切分阶段(phase 0)
|
# 重置所有变量,回到切分阶段(phase 0)
|
||||||
self.phase = 0
|
self.phase = 0
|
||||||
self.partition_step = 0
|
self.partition_step = 0
|
||||||
self.partition_values = np.zeros(self.CUT_NUM, dtype=np.float32)
|
self.ori_row_cuts = self.ORI_ROW_CUTS[:]
|
||||||
self.col_cuts = []
|
self.ori_col_cuts = self.ORI_COL_CUTS[:]
|
||||||
self.row_cuts = []
|
|
||||||
self.rectangles = []
|
self.rectangles = []
|
||||||
|
|
||||||
# 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0)
|
# 状态:前 4 维为 partition_values,其余为区域访问状态(初始全0)
|
||||||
state = self.partition_values
|
state = np.array(self.ori_row_cuts + self.ori_col_cuts)
|
||||||
|
|
||||||
return state
|
return state
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
# 在所有阶段动作均为 1 维连续动作,取 action[0]
|
# 在所有阶段动作均为 1 维连续动作,取 action[0]
|
||||||
a = float(action[0])
|
adjust = float(action[0])
|
||||||
self.partition_values[self.partition_step] = a
|
valid_adjust = True
|
||||||
|
|
||||||
|
if self.partition_step < self.ROW_CUT_LIMIT:
|
||||||
|
row_cut = self.ori_row_cuts[self.partition_step + 1]
|
||||||
|
new_row_cut = row_cut + adjust
|
||||||
|
self.ori_row_cuts[self.partition_step + 1] = new_row_cut
|
||||||
|
|
||||||
|
if self.ori_row_cuts[self.partition_step] < new_row_cut < self.ori_row_cuts[self.partition_step + 2]:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
valid_adjust = False
|
||||||
|
reward = -100
|
||||||
|
else:
|
||||||
|
col_idx = self.partition_step - self.ROW_CUT_LIMIT
|
||||||
|
col_cut = self.ori_col_cuts[col_idx + 1]
|
||||||
|
new_col_cut = col_cut + adjust
|
||||||
|
self.ori_col_cuts[col_idx + 1] = new_col_cut
|
||||||
|
|
||||||
|
if self.ori_col_cuts[col_idx] < new_col_cut < self.ori_col_cuts[col_idx + 2]:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
valid_adjust = False
|
||||||
|
reward = -100
|
||||||
|
|
||||||
self.partition_step += 1
|
self.partition_step += 1
|
||||||
|
|
||||||
# 构造当前状态:前 partition_step 个为已决策值,其余为 0,再补 7 个 0
|
state = np.array(self.ori_row_cuts + self.ori_col_cuts)
|
||||||
state = self.partition_values
|
|
||||||
|
|
||||||
# 如果未完成 4 步,则仍处于切分阶段,不发奖励,done 为 False
|
# 出现无效调整,直接结束
|
||||||
|
if not valid_adjust:
|
||||||
|
return state, reward, True, False, {}
|
||||||
|
else:
|
||||||
if self.partition_step < self.CUT_NUM:
|
if self.partition_step < self.CUT_NUM:
|
||||||
return state, 0.0, False, False, {}
|
return state, 0.0, False, False, {}
|
||||||
else:
|
else:
|
||||||
# 完成 4 步后,计算切分边界
|
# 完成 4 步后,判断分区是否合理,并计算各个分区的任务卸载率ρ
|
||||||
# 过滤掉 0,并去重后排序
|
|
||||||
rows = sorted(
|
|
||||||
set(v for v in self.partition_values[:self.ROW_CUT_LIMIT] if v > 0))
|
|
||||||
cols = sorted(
|
|
||||||
set(v for v in self.partition_values[self.ROW_CUT_LIMIT:] if v > 0))
|
|
||||||
rows = rows if rows else []
|
|
||||||
cols = cols if cols else []
|
|
||||||
|
|
||||||
# 边界:始终包含 0 和 1
|
|
||||||
self.row_cuts = [0.0] + rows + [1.0]
|
|
||||||
self.col_cuts = [0.0] + cols + [1.0]
|
|
||||||
|
|
||||||
# 判断分区是否合理,并计算各个分区的任务卸载率ρ
|
|
||||||
valid_partition = True
|
valid_partition = True
|
||||||
for i in range(len(self.row_cuts) - 1):
|
for i in range(len(self.ori_row_cuts) - 1):
|
||||||
for j in range(len(self.col_cuts) - 1):
|
for j in range(len(self.ori_col_cuts) - 1):
|
||||||
d = (self.col_cuts[j+1] - self.col_cuts[j]) * self.W * \
|
d = (self.ori_col_cuts[j+1] - self.ori_col_cuts[j]) * self.W * \
|
||||||
(self.row_cuts[i+1] - self.row_cuts[i]) * self.H
|
(self.ori_row_cuts[i+1] -
|
||||||
|
self.ori_row_cuts[i]) * self.H
|
||||||
rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
|
rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
|
||||||
(self.comp_time_factor - self.trans_time_factor)
|
(self.comp_time_factor - self.trans_time_factor)
|
||||||
rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
|
rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
|
||||||
@ -123,7 +133,7 @@ class PartitionEnv(gym.Env):
|
|||||||
bs_time = self.bs_time_factor * (1 - rho) * d
|
bs_time = self.bs_time_factor * (1 - rho) * d
|
||||||
|
|
||||||
self.rectangles.append({
|
self.rectangles.append({
|
||||||
'center': ((self.row_cuts[i] + self.row_cuts[i+1]) * self.H / 2, (self.col_cuts[j+1] + self.col_cuts[j]) * self.W / 2),
|
'center': ((self.ori_row_cuts[i] + self.ori_row_cuts[i+1]) * self.H / 2, (self.ori_col_cuts[j+1] + self.ori_col_cuts[j]) * self.W / 2),
|
||||||
'flight_time': flight_time,
|
'flight_time': flight_time,
|
||||||
'bs_time': bs_time,
|
'bs_time': bs_time,
|
||||||
})
|
})
|
||||||
@ -131,13 +141,9 @@ class PartitionEnv(gym.Env):
|
|||||||
break
|
break
|
||||||
|
|
||||||
if not valid_partition:
|
if not valid_partition:
|
||||||
reward = -100
|
reward = -10
|
||||||
state = self.partition_values
|
|
||||||
return state, reward, True, False, {}
|
return state, reward, True, False, {}
|
||||||
else:
|
else:
|
||||||
reward = 0
|
|
||||||
state = self.partition_values
|
|
||||||
|
|
||||||
# 继续进行路径规划
|
# 继续进行路径规划
|
||||||
# 使用q_learning解多旅行商
|
# 使用q_learning解多旅行商
|
||||||
# cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标
|
# cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标
|
||||||
@ -175,8 +181,7 @@ class PartitionEnv(gym.Env):
|
|||||||
# print(best_time)
|
# print(best_time)
|
||||||
# print(best_path)
|
# print(best_path)
|
||||||
|
|
||||||
reward += self.BASE_LINE - best_time
|
reward = self.BASE_LINE / best_time
|
||||||
print(reward)
|
|
||||||
|
|
||||||
return state, reward, True, False, best_path
|
return state, reward, True, False, best_path
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ print('state:', state)
|
|||||||
# action_series = [[0.67], [0], [0], [0], [0.7]]
|
# action_series = [[0.67], [0], [0], [0], [0.7]]
|
||||||
# action_series = [0, 0, 3, 0, 10]
|
# action_series = [0, 0, 3, 0, 10]
|
||||||
action_series = [[0.2], [0.4], [0.7], [0.5]]
|
action_series = [[0.2], [0.4], [0.7], [0.5]]
|
||||||
# action_series = [[0.5], [0.5]]
|
action_series = [[-0.1], [0], [0], [0]]
|
||||||
|
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
action = action_series[i]
|
action = action_series[i]
|
||||||
|
Loading…
Reference in New Issue
Block a user