finished BO
Manual case and BO fully functional
This commit is contained in:
parent
33764e1ac3
commit
1416e72675
@ -5,5 +5,6 @@ uint16 nr_runs
|
|||||||
string acquisition_function
|
string acquisition_function
|
||||||
---
|
---
|
||||||
float32[] best_policy
|
float32[] best_policy
|
||||||
|
float32[] best_weights
|
||||||
float32[] reward_mean
|
float32[] reward_mean
|
||||||
float32[] reward_std
|
float32[] reward_std
|
@ -7,6 +7,7 @@ from active_bo_ros.AcquisitionFunctions.ExpectedImprovement import ExpectedImpro
|
|||||||
from active_bo_ros.AcquisitionFunctions.ProbabilityOfImprovement import ProbabilityOfImprovement
|
from active_bo_ros.AcquisitionFunctions.ProbabilityOfImprovement import ProbabilityOfImprovement
|
||||||
from active_bo_ros.AcquisitionFunctions.ConfidenceBound import ConfidenceBound
|
from active_bo_ros.AcquisitionFunctions.ConfidenceBound import ConfidenceBound
|
||||||
|
|
||||||
|
|
||||||
class BayesianOptimization:
|
class BayesianOptimization:
|
||||||
def __init__(self, env, nr_steps, nr_init=3, acq='ei', nr_weights=6, policy_seed=None):
|
def __init__(self, env, nr_steps, nr_init=3, acq='ei', nr_weights=6, policy_seed=None):
|
||||||
self.env = env
|
self.env = env
|
||||||
@ -43,7 +44,6 @@ class BayesianOptimization:
|
|||||||
self.best_reward = np.empty((1, 1))
|
self.best_reward = np.empty((1, 1))
|
||||||
|
|
||||||
def runner(self, policy):
|
def runner(self, policy):
|
||||||
done = False
|
|
||||||
env_reward = 0.0
|
env_reward = 0.0
|
||||||
step_count = 0
|
step_count = 0
|
||||||
|
|
||||||
@ -87,7 +87,7 @@ class BayesianOptimization:
|
|||||||
self.GP.fit(self.X, self.Y)
|
self.GP.fit(self.X, self.Y)
|
||||||
|
|
||||||
def next_observation(self):
|
def next_observation(self):
|
||||||
if self.acq == 'ei':
|
if self.acq == "Expected Improvement":
|
||||||
x_next = ExpectedImprovement(self.GP,
|
x_next = ExpectedImprovement(self.GP,
|
||||||
self.X,
|
self.X,
|
||||||
self.eval_X,
|
self.eval_X,
|
||||||
@ -99,7 +99,7 @@ class BayesianOptimization:
|
|||||||
|
|
||||||
return x_next
|
return x_next
|
||||||
|
|
||||||
elif self.acq == 'pi':
|
elif self.acq == "Probability of Improvement":
|
||||||
x_next = ProbabilityOfImprovement(self.GP,
|
x_next = ProbabilityOfImprovement(self.GP,
|
||||||
self.X,
|
self.X,
|
||||||
self.eval_X,
|
self.eval_X,
|
||||||
@ -111,7 +111,7 @@ class BayesianOptimization:
|
|||||||
|
|
||||||
return x_next
|
return x_next
|
||||||
|
|
||||||
elif self.acq == 'cb':
|
elif self.acq == "Upper Confidence Bound":
|
||||||
x_next = ConfidenceBound(self.GP,
|
x_next = ConfidenceBound(self.GP,
|
||||||
self.X,
|
self.X,
|
||||||
self.eval_X,
|
self.eval_X,
|
||||||
@ -137,7 +137,10 @@ class BayesianOptimization:
|
|||||||
|
|
||||||
self.GP.fit(self.X, self.Y)
|
self.GP.fit(self.X, self.Y)
|
||||||
|
|
||||||
self.best_reward = np.vstack((self.best_reward, max(self.Y)))
|
if self.episode == 0:
|
||||||
|
self.best_reward[0] = max(self.Y)
|
||||||
|
else:
|
||||||
|
self.best_reward = np.vstack((self.best_reward, max(self.Y)))
|
||||||
|
|
||||||
self.episode += 1
|
self.episode += 1
|
||||||
return step_count
|
return step_count
|
||||||
@ -148,8 +151,6 @@ class BayesianOptimization:
|
|||||||
x_max = self.X[idx, :]
|
x_max = self.X[idx, :]
|
||||||
|
|
||||||
self.policy_model.weights = x_max
|
self.policy_model.weights = x_max
|
||||||
|
best_policy = self.policy_model.rollout().reshape(-1,)
|
||||||
|
|
||||||
return self.policy_model.rollout(), y_hat[idx]
|
return best_policy, y_hat[idx], x_max
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class GaussianRBF:
|
class GaussianRBF:
|
||||||
def __init__(self, nr_weights, nr_steps, seed=None, lowerb=-1.0, upperb=1.0):
|
def __init__(self, nr_weights, nr_steps, seed=None, lowerb=-1.0, upperb=1.0):
|
||||||
self.nr_weights = nr_weights
|
self.nr_weights = nr_weights
|
||||||
@ -33,11 +35,13 @@ class GaussianRBF:
|
|||||||
|
|
||||||
return self.policy
|
return self.policy
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
policy = GaussianRBFModel(1, 50)
|
policy = GaussianRBF(1, 50)
|
||||||
policy.random_policy()
|
policy.random_policy()
|
||||||
policy.policy_rollout()
|
policy.rollout()
|
||||||
print(policy.weights)
|
print(policy.weights)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
@ -8,6 +8,7 @@ from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class BOService(Node):
|
class BOService(Node):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__('bo_service')
|
super().__init__('bo_service')
|
||||||
@ -19,15 +20,18 @@ class BOService(Node):
|
|||||||
self.nr_init = 3
|
self.nr_init = 3
|
||||||
|
|
||||||
def bo_callback(self, request, response):
|
def bo_callback(self, request, response):
|
||||||
|
self.get_logger().info('Bayesian Optimization Service started!')
|
||||||
nr_weights = request.nr_weights
|
nr_weights = request.nr_weights
|
||||||
max_steps = request.steps
|
max_steps = request.max_steps
|
||||||
nr_episodes = request.nr_episodes
|
nr_episodes = request.nr_episodes
|
||||||
nr_runs = request.nr_runs
|
nr_runs = request.nr_runs
|
||||||
acq = request.acquisition_function
|
acq = request.acquisition_function
|
||||||
|
self.get_logger().info(acq)
|
||||||
|
|
||||||
reward = np.zeros((nr_episodes, nr_runs))
|
reward = np.zeros((nr_episodes, nr_runs))
|
||||||
best_pol_reward = np.zeros((nr_runs, 1))
|
best_pol_reward = np.zeros((1, nr_runs))
|
||||||
best_policy = np.zeros((max_steps, nr_runs))
|
best_policy = np.zeros((max_steps, nr_runs))
|
||||||
|
best_weights = np.zeros((nr_weights, nr_runs))
|
||||||
|
|
||||||
BO = BayesianOptimization(self.env,
|
BO = BayesianOptimization(self.env,
|
||||||
max_steps,
|
max_steps,
|
||||||
@ -42,14 +46,16 @@ class BOService(Node):
|
|||||||
x_next = BO.next_observation()
|
x_next = BO.next_observation()
|
||||||
BO.eval_new_observation(x_next)
|
BO.eval_new_observation(x_next)
|
||||||
|
|
||||||
best_policy[:, i], best_pol_reward[:, i] = BO.get_best_result()
|
best_policy[:, i], best_pol_reward[:, i], best_weights[:, i] = BO.get_best_result()
|
||||||
|
|
||||||
reward[:, i] = BO.best_reward.T
|
reward[:, i] = BO.best_reward.T
|
||||||
|
|
||||||
response.reward_mean = np.mean(reward, axis=1)
|
response.reward_mean = np.mean(reward, axis=1).tolist()
|
||||||
response.reward_std = np.std(reward, axis=1)
|
response.reward_std = np.std(reward, axis=1).tolist()
|
||||||
|
|
||||||
best_policy_idx = np.argmax(best_pol_reward)
|
best_policy_idx = np.argmax(best_pol_reward)
|
||||||
response.best_policy = best_policy[:, best_policy_idx]
|
response.best_weights = best_weights[:, best_policy_idx].tolist()
|
||||||
|
response.best_policy = best_policy[:, best_policy_idx].tolist()
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
@ -60,5 +66,6 @@ def main(args=None):
|
|||||||
|
|
||||||
rclpy.spin(bo_service)
|
rclpy.spin(bo_service)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
Loading…
Reference in New Issue
Block a user