每一个加一个奖励
This commit is contained in:
parent
f347ca8276
commit
3e6887c655
1
GA/ga.py
1
GA/ga.py
@ -3,6 +3,7 @@ import random
|
||||
|
||||
# import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
# np.random.seed(42)
|
||||
|
||||
|
||||
class GA(object):
|
||||
|
130
env_partion.py
130
env_partion.py
@ -108,82 +108,88 @@ class PartitionEnv(gym.Env):
|
||||
# 出现无效调整,直接结束
|
||||
if not valid_adjust:
|
||||
return state, reward, True, False, {}
|
||||
# 调整合理,计算当前时间
|
||||
else:
|
||||
if self.partition_step < self.CUT_NUM:
|
||||
return state, 0.0, False, False, {}
|
||||
rectangles = self.if_valid_partition()
|
||||
|
||||
if not rectangles:
|
||||
reward = -10
|
||||
return state, reward, True, False, {}
|
||||
else:
|
||||
# 完成 4 步后,判断分区是否合理,并计算各个分区的任务卸载率ρ
|
||||
valid_partition = True
|
||||
for i in range(len(self.ori_row_cuts) - 1):
|
||||
for j in range(len(self.ori_col_cuts) - 1):
|
||||
d = (self.ori_col_cuts[j+1] - self.ori_col_cuts[j]) * self.W * \
|
||||
(self.ori_row_cuts[i+1] -
|
||||
self.ori_row_cuts[i]) * self.H
|
||||
rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
|
||||
(self.comp_time_factor - self.trans_time_factor)
|
||||
rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
|
||||
(self.comp_energy_factor * d -
|
||||
self.trans_energy_factor * d)
|
||||
if rho_energy_limit < 0:
|
||||
valid_partition = False
|
||||
break
|
||||
rho = min(rho_time_limit, rho_energy_limit)
|
||||
# 继续进行路径规划
|
||||
# 使用遗传算法解多旅行商
|
||||
best_time, best_path = self.ga_solver(rectangles)
|
||||
# print(best_time)
|
||||
# print(best_path)
|
||||
|
||||
flight_time = self.flight_time_factor * d
|
||||
bs_time = self.bs_time_factor * (1 - rho) * d
|
||||
|
||||
self.rectangles.append({
|
||||
'center': ((self.ori_row_cuts[i] + self.ori_row_cuts[i+1]) * self.H / 2, (self.ori_col_cuts[j+1] + self.ori_col_cuts[j]) * self.W / 2),
|
||||
'flight_time': flight_time,
|
||||
'bs_time': bs_time,
|
||||
})
|
||||
if not valid_partition:
|
||||
break
|
||||
|
||||
if not valid_partition:
|
||||
reward = -10
|
||||
return state, reward, True, False, {}
|
||||
reward = self.BASE_LINE / best_time
|
||||
if self.partition_step < self.CUT_NUM:
|
||||
done = False
|
||||
else:
|
||||
# 继续进行路径规划
|
||||
# 使用q_learning解多旅行商
|
||||
# cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标
|
||||
# rec_center_lt = [rec_info['center']
|
||||
# for rec_info in self.rectangles]
|
||||
# cities = np.column_stack(rec_center_lt)
|
||||
# cities = np.column_stack((self.center, cities))
|
||||
done = True
|
||||
reward = reward * 3
|
||||
|
||||
# center_idx = []
|
||||
# for i in range(self.num_cars - 1):
|
||||
# cities = np.column_stack((cities, self.center))
|
||||
# center_idx.append(cities.shape[1] - 1)
|
||||
return state, reward, done, False, best_path
|
||||
|
||||
# tsp = mTSP(params=self.params, num_cities=cities.shape[1], cities=cities, num_cars=self.num_cars,
|
||||
# center_idx=center_idx, rectangles=self.rectangles)
|
||||
def if_valid_partition(self):
|
||||
rectangles = []
|
||||
for i in range(len(self.ori_row_cuts) - 1):
|
||||
for j in range(len(self.ori_col_cuts) - 1):
|
||||
d = (self.ori_col_cuts[j+1] - self.ori_col_cuts[j]) * self.W * \
|
||||
(self.ori_row_cuts[i+1] -
|
||||
self.ori_row_cuts[i]) * self.H
|
||||
rho_time_limit = (self.flight_time_factor - self.trans_time_factor) / \
|
||||
(self.comp_time_factor - self.trans_time_factor)
|
||||
rho_energy_limit = (self.battery_energy_capacity - self.flight_energy_factor * d - self.trans_energy_factor * d) / \
|
||||
(self.comp_energy_factor * d -
|
||||
self.trans_energy_factor * d)
|
||||
if rho_energy_limit < 0:
|
||||
return []
|
||||
rho = min(rho_time_limit, rho_energy_limit)
|
||||
|
||||
# best_time, best_path = tsp.train(self.mTSP_STEPS)
|
||||
flight_time = self.flight_time_factor * d
|
||||
bs_time = self.bs_time_factor * (1 - rho) * d
|
||||
|
||||
# 使用遗传算法解多旅行商
|
||||
cities = [self.center]
|
||||
for rec in self.rectangles:
|
||||
cities.append(rec['center'])
|
||||
cities = np.array(cities)
|
||||
rectangles.append({
|
||||
'center': ((self.ori_row_cuts[i] + self.ori_row_cuts[i+1]) * self.H / 2, (self.ori_col_cuts[j+1] + self.ori_col_cuts[j]) * self.W / 2),
|
||||
'flight_time': flight_time,
|
||||
'bs_time': bs_time,
|
||||
})
|
||||
return rectangles
|
||||
|
||||
# def q_learning_solver(self):
|
||||
# 使用q_learning解多旅行商
|
||||
# cities: [[x1, x2, x3...], [y1, y2, y3...]] 城市坐标
|
||||
# rec_center_lt = [rec_info['center']
|
||||
# for rec_info in rectangles]
|
||||
# cities = np.column_stack(rec_center_lt)
|
||||
# cities = np.column_stack((self.center, cities))
|
||||
|
||||
center_idx = [0]
|
||||
for i in range(self.num_cars - 1):
|
||||
cities = np.row_stack((cities, self.center))
|
||||
center_idx.append(cities.shape[0] - 1)
|
||||
# center_idx = []
|
||||
# for i in range(self.num_cars - 1):
|
||||
# cities = np.column_stack((cities, self.center))
|
||||
# center_idx.append(cities.shape[1] - 1)
|
||||
|
||||
ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20,
|
||||
data=cities, to_process_idx=center_idx, rectangles=self.rectangles)
|
||||
# tsp = mTSP(params=self.params, num_cities=cities.shape[1], cities=cities, num_cars=self.num_cars,
|
||||
# center_idx=center_idx, rectangles=rectangles)
|
||||
|
||||
best_path, best_time = ga.run()
|
||||
# best_time, best_path = tsp.train(self.mTSP_STEPS)
|
||||
|
||||
# print(best_time)
|
||||
# print(best_path)
|
||||
def ga_solver(self, rectangles):
|
||||
cities = [self.center]
|
||||
for rec in rectangles:
|
||||
cities.append(rec['center'])
|
||||
cities = np.array(cities)
|
||||
|
||||
reward = self.BASE_LINE / best_time
|
||||
center_idx = [0]
|
||||
for i in range(self.num_cars - 1):
|
||||
cities = np.row_stack((cities, self.center))
|
||||
center_idx.append(cities.shape[0] - 1)
|
||||
|
||||
return state, reward, True, False, best_path
|
||||
ga = GA(num_drones=self.num_cars, num_city=cities.shape[0], num_total=20,
|
||||
data=cities, to_process_idx=center_idx, rectangles=rectangles)
|
||||
best_path, best_time = ga.run()
|
||||
return best_time, best_path
|
||||
|
||||
def render(self):
|
||||
if self.phase == 1:
|
||||
|
@ -10,7 +10,7 @@ print('state:', state)
|
||||
|
||||
# action_series = [[0.67], [0], [0], [0], [0.7]]
|
||||
# action_series = [0, 0, 3, 0, 10]
|
||||
action_series = [[0.2], [0.4], [0.7], [0.5]]
|
||||
# action_series = [[0.2], [0.4], [0.7], [0.5]]
|
||||
action_series = [[-0.1], [0], [0], [0]]
|
||||
|
||||
for i in range(100):
|
||||
|
Loading…
Reference in New Issue
Block a user