diff --git a/src/active_bo_msgs/srv/BO.srv b/src/active_bo_msgs/srv/BO.srv index b8e1cd1..3b7b03c 100644 --- a/src/active_bo_msgs/srv/BO.srv +++ b/src/active_bo_msgs/srv/BO.srv @@ -5,5 +5,6 @@ uint16 nr_runs string acquisition_function --- float32[] best_policy +float32[] best_weights float32[] reward_mean float32[] reward_std \ No newline at end of file diff --git a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py index db1e211..890ead1 100644 --- a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py +++ b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py @@ -7,6 +7,7 @@ from active_bo_ros.AcquisitionFunctions.ExpectedImprovement import ExpectedImpro from active_bo_ros.AcquisitionFunctions.ProbabilityOfImprovement import ProbabilityOfImprovement from active_bo_ros.AcquisitionFunctions.ConfidenceBound import ConfidenceBound + class BayesianOptimization: def __init__(self, env, nr_steps, nr_init=3, acq='ei', nr_weights=6, policy_seed=None): self.env = env @@ -43,7 +44,6 @@ class BayesianOptimization: self.best_reward = np.empty((1, 1)) def runner(self, policy): - done = False env_reward = 0.0 step_count = 0 @@ -87,7 +87,7 @@ class BayesianOptimization: self.GP.fit(self.X, self.Y) def next_observation(self): - if self.acq == 'ei': + if self.acq == "Expected Improvement": x_next = ExpectedImprovement(self.GP, self.X, self.eval_X, @@ -99,7 +99,7 @@ class BayesianOptimization: return x_next - elif self.acq == 'pi': + elif self.acq == "Probability of Improvement": x_next = ProbabilityOfImprovement(self.GP, self.X, self.eval_X, @@ -111,7 +111,7 @@ class BayesianOptimization: return x_next - elif self.acq == 'cb': + elif self.acq == "Upper Confidence Bound": x_next = ConfidenceBound(self.GP, self.X, self.eval_X, @@ -137,7 +137,10 @@ class BayesianOptimization: self.GP.fit(self.X, self.Y) - self.best_reward = np.vstack((self.best_reward, max(self.Y))) + if self.episode == 0: + self.best_reward[0] = max(self.Y) + else: + self.best_reward = np.vstack((self.best_reward, max(self.Y))) self.episode += 1 return step_count @@ -148,8 +151,6 @@ class BayesianOptimization: x_max = self.X[idx, :] self.policy_model.weights = x_max + best_policy = self.policy_model.rollout().reshape(-1,) - return self.policy_model.rollout(), y_hat[idx] - - - + return best_policy, y_hat[idx], x_max diff --git a/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianRBFModel.py b/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianRBFModel.py index b19d360..b517fb9 100644 --- a/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianRBFModel.py +++ b/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianRBFModel.py @@ -1,4 +1,6 @@ import numpy as np + + class GaussianRBF: def __init__(self, nr_weights, nr_steps, seed=None, lowerb=-1.0, upperb=1.0): self.nr_weights = nr_weights @@ -33,11 +35,13 @@ class GaussianRBF: return self.policy + def main(): - policy = GaussianRBFModel(1, 50) + policy = GaussianRBF(1, 50) policy.random_policy() - policy.policy_rollout() + policy.rollout() print(policy.weights) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/active_bo_ros/active_bo_ros/bo_service.py b/src/active_bo_ros/active_bo_ros/bo_service.py index 1f490bc..f598470 100644 --- a/src/active_bo_ros/active_bo_ros/bo_service.py +++ b/src/active_bo_ros/active_bo_ros/bo_service.py @@ -8,6 +8,7 @@ from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous import numpy as np + class BOService(Node): def __init__(self): super().__init__('bo_service') @@ -19,15 +20,18 @@ class BOService(Node): self.nr_init = 3 def bo_callback(self, request, response): + self.get_logger().info('Bayesian Optimization Service started!') nr_weights = request.nr_weights - max_steps = request.steps + max_steps = request.max_steps nr_episodes = request.nr_episodes nr_runs = request.nr_runs acq = request.acquisition_function + self.get_logger().info(acq) reward = np.zeros((nr_episodes, nr_runs)) - best_pol_reward = np.zeros((nr_runs, 1)) + best_pol_reward = np.zeros((1, nr_runs)) best_policy = np.zeros((max_steps, nr_runs)) + best_weights = np.zeros((nr_weights, nr_runs)) BO = BayesianOptimization(self.env, max_steps, @@ -42,14 +46,16 @@ class BOService(Node): x_next = BO.next_observation() BO.eval_new_observation(x_next) - best_policy[:, i], best_pol_reward[:, i] = BO.get_best_result() + best_policy[:, i], best_pol_reward[:, i], best_weights[:, i] = BO.get_best_result() + reward[:, i] = BO.best_reward.T - response.reward_mean = np.mean(reward, axis=1) - response.reward_std = np.std(reward, axis=1) + response.reward_mean = np.mean(reward, axis=1).tolist() + response.reward_std = np.std(reward, axis=1).tolist() best_policy_idx = np.argmax(best_pol_reward) - response.best_policy = best_policy[:, best_policy_idx] + response.best_weights = best_weights[:, best_policy_idx].tolist() + response.best_policy = best_policy[:, best_policy_idx].tolist() return response @@ -60,5 +66,6 @@ def main(args=None): rclpy.spin(bo_service) + if __name__ == '__main__': main()