finished BO

Manual case and BO fully functional
2023-03-08 16:27:14 +01:00 · 2023-03-08 16:27:14 +01:00 · 1416e72675
commit 1416e72675
parent 33764e1ac3
4 changed files with 31 additions and 18 deletions
--- a/src/active_bo_msgs/srv/BO.srv
+++ b/src/active_bo_msgs/srv/BO.srv
@ -5,5 +5,6 @@ uint16 nr_runs
 string acquisition_function
 ---
 float32[] best_policy
 float32[] best_weights
 float32[] reward_mean
 float32[] reward_std
--- a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py
+++ b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py
@ -7,6 +7,7 @@ from active_bo_ros.AcquisitionFunctions.ExpectedImprovement import ExpectedImpro
 from active_bo_ros.AcquisitionFunctions.ProbabilityOfImprovement import ProbabilityOfImprovement
 from active_bo_ros.AcquisitionFunctions.ConfidenceBound import ConfidenceBound
 class BayesianOptimization:
    def __init__(self, env, nr_steps, nr_init=3, acq='ei', nr_weights=6, policy_seed=None):
        self.env = env
@ -43,7 +44,6 @@ class BayesianOptimization:
        self.best_reward = np.empty((1, 1))
    def runner(self, policy):
        done = False
        env_reward = 0.0
        step_count = 0
@ -87,7 +87,7 @@ class BayesianOptimization:
        self.GP.fit(self.X, self.Y)
    def next_observation(self):
-        if self.acq == 'ei':
+        if self.acq == "Expected Improvement":
            x_next = ExpectedImprovement(self.GP,
                                         self.X,
                                         self.eval_X,
@ -99,7 +99,7 @@ class BayesianOptimization:
            return x_next
-        elif self.acq == 'pi':
+        elif self.acq == "Probability of Improvement":
            x_next = ProbabilityOfImprovement(self.GP,
                                              self.X,
                                              self.eval_X,
@ -111,7 +111,7 @@ class BayesianOptimization:
            return x_next
-        elif self.acq == 'cb':
+        elif self.acq == "Upper Confidence Bound":
            x_next = ConfidenceBound(self.GP,
                                     self.X,
                                     self.eval_X,
@ -137,6 +137,9 @@ class BayesianOptimization:
        self.GP.fit(self.X, self.Y)
        if self.episode == 0:
            self.best_reward[0] = max(self.Y)
        else:
            self.best_reward = np.vstack((self.best_reward, max(self.Y)))
        self.episode += 1
@ -148,8 +151,6 @@ class BayesianOptimization:
        x_max = self.X[idx, :]
        self.policy_model.weights = x_max
        best_policy = self.policy_model.rollout().reshape(-1,)
-        return self.policy_model.rollout(), y_hat[idx]
+        return best_policy, y_hat[idx], x_max
--- a/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianRBFModel.py
+++ b/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianRBFModel.py
@ -1,4 +1,6 @@
 import numpy as np
 class GaussianRBF:
    def __init__(self, nr_weights, nr_steps, seed=None, lowerb=-1.0, upperb=1.0):
        self.nr_weights = nr_weights
@ -33,11 +35,13 @@ class GaussianRBF:
        return self.policy
 def main():
-    policy = GaussianRBFModel(1, 50)
+    policy = GaussianRBF(1, 50)
    policy.random_policy()
-    policy.policy_rollout()
+    policy.rollout()
    print(policy.weights)
 if __name__ == "__main__":
    main()
--- a/src/active_bo_ros/active_bo_ros/bo_service.py
+++ b/src/active_bo_ros/active_bo_ros/bo_service.py
@ -8,6 +8,7 @@ from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous
 import numpy as np
 class BOService(Node):
    def __init__(self):
        super().__init__('bo_service')
@ -19,15 +20,18 @@ class BOService(Node):
        self.nr_init = 3
    def bo_callback(self, request, response):
        self.get_logger().info('Bayesian Optimization Service started!')
        nr_weights = request.nr_weights
-        max_steps = request.steps
+        max_steps = request.max_steps
        nr_episodes = request.nr_episodes
        nr_runs = request.nr_runs
        acq = request.acquisition_function
        self.get_logger().info(acq)
        reward = np.zeros((nr_episodes, nr_runs))
-        best_pol_reward = np.zeros((nr_runs, 1))
+        best_pol_reward = np.zeros((1, nr_runs))
        best_policy = np.zeros((max_steps, nr_runs))
        best_weights = np.zeros((nr_weights, nr_runs))
        BO = BayesianOptimization(self.env,
                                  max_steps,
@ -42,14 +46,16 @@ class BOService(Node):
                x_next = BO.next_observation()
                BO.eval_new_observation(x_next)
-            best_policy[:, i], best_pol_reward[:, i] = BO.get_best_result()
+            best_policy[:, i], best_pol_reward[:, i], best_weights[:, i] = BO.get_best_result()
            reward[:, i] = BO.best_reward.T
-        response.reward_mean = np.mean(reward, axis=1)
+        response.reward_mean = np.mean(reward, axis=1).tolist()
-        response.reward_std = np.std(reward, axis=1)
+        response.reward_std = np.std(reward, axis=1).tolist()
        best_policy_idx = np.argmax(best_pol_reward)
-        response.best_policy = best_policy[:, best_policy_idx]
+        response.best_weights = best_weights[:, best_policy_idx].tolist()
        response.best_policy = best_policy[:, best_policy_idx].tolist()
        return response
@ -60,5 +66,6 @@ def main(args=None):
    rclpy.spin(bo_service)
 if __name__ == '__main__':
    main()