From 1416e72675e1f95fa0ab1b1ad1b0ec631ce55ee5 Mon Sep 17 00:00:00 2001
From: Niko <nikolaus.feith@unileoben.ac.at>
Date: Wed, 8 Mar 2023 16:27:14 +0100
Subject: [PATCH] finished BO Manual case and BO fully functional

---
 src/active_bo_msgs/srv/BO.srv                 |  1 +
 .../BayesianOptimization.py                   | 19 ++++++++++---------
 .../PolicyModel/GaussianRBFModel.py           | 10 +++++++---
 src/active_bo_ros/active_bo_ros/bo_service.py | 19 +++++++++++++------
 4 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/src/active_bo_msgs/srv/BO.srv b/src/active_bo_msgs/srv/BO.srv
index b8e1cd1..3b7b03c 100644
--- a/src/active_bo_msgs/srv/BO.srv
+++ b/src/active_bo_msgs/srv/BO.srv
@@ -5,5 +5,6 @@ uint16 nr_runs
 string acquisition_function
 ---
 float32[] best_policy
+float32[] best_weights
 float32[] reward_mean
 float32[] reward_std
\ No newline at end of file
diff --git a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py
index db1e211..890ead1 100644
--- a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py
+++ b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py
@@ -7,6 +7,7 @@ from active_bo_ros.AcquisitionFunctions.ExpectedImprovement import ExpectedImpro
 from active_bo_ros.AcquisitionFunctions.ProbabilityOfImprovement import ProbabilityOfImprovement
 from active_bo_ros.AcquisitionFunctions.ConfidenceBound import ConfidenceBound
 
+
 class BayesianOptimization:
     def __init__(self, env, nr_steps, nr_init=3, acq='ei', nr_weights=6, policy_seed=None):
         self.env = env
@@ -43,7 +44,6 @@ class BayesianOptimization:
         self.best_reward = np.empty((1, 1))
 
     def runner(self, policy):
-        done = False
         env_reward = 0.0
         step_count = 0
 
@@ -87,7 +87,7 @@ class BayesianOptimization:
         self.GP.fit(self.X, self.Y)
 
     def next_observation(self):
-        if self.acq == 'ei':
+        if self.acq == "Expected Improvement":
             x_next = ExpectedImprovement(self.GP,
                                          self.X,
                                          self.eval_X,
@@ -99,7 +99,7 @@ class BayesianOptimization:
 
             return x_next
 
-        elif self.acq == 'pi':
+        elif self.acq == "Probability of Improvement":
             x_next = ProbabilityOfImprovement(self.GP,
                                               self.X,
                                               self.eval_X,
@@ -111,7 +111,7 @@ class BayesianOptimization:
 
             return x_next
 
-        elif self.acq == 'cb':
+        elif self.acq == "Upper Confidence Bound":
             x_next = ConfidenceBound(self.GP,
                                      self.X,
                                      self.eval_X,
@@ -137,7 +137,10 @@ class BayesianOptimization:
 
         self.GP.fit(self.X, self.Y)
 
-        self.best_reward = np.vstack((self.best_reward, max(self.Y)))
+        if self.episode == 0:
+            self.best_reward[0] = max(self.Y)
+        else:
+            self.best_reward = np.vstack((self.best_reward, max(self.Y)))
 
         self.episode += 1
         return step_count
@@ -148,8 +151,6 @@ class BayesianOptimization:
         x_max = self.X[idx, :]
 
         self.policy_model.weights = x_max
+        best_policy = self.policy_model.rollout().reshape(-1,)
 
-        return self.policy_model.rollout(), y_hat[idx]
-
-
-
+        return best_policy, y_hat[idx], x_max
diff --git a/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianRBFModel.py b/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianRBFModel.py
index b19d360..b517fb9 100644
--- a/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianRBFModel.py
+++ b/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianRBFModel.py
@@ -1,4 +1,6 @@
 import numpy as np
+
+
 class GaussianRBF:
     def __init__(self, nr_weights, nr_steps, seed=None, lowerb=-1.0, upperb=1.0):
         self.nr_weights = nr_weights
@@ -33,11 +35,13 @@ class GaussianRBF:
 
         return self.policy
 
+
 def main():
-    policy = GaussianRBFModel(1, 50)
+    policy = GaussianRBF(1, 50)
     policy.random_policy()
-    policy.policy_rollout()
+    policy.rollout()
     print(policy.weights)
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/src/active_bo_ros/active_bo_ros/bo_service.py b/src/active_bo_ros/active_bo_ros/bo_service.py
index 1f490bc..f598470 100644
--- a/src/active_bo_ros/active_bo_ros/bo_service.py
+++ b/src/active_bo_ros/active_bo_ros/bo_service.py
@@ -8,6 +8,7 @@ from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous
 
 import numpy as np
 
+
 class BOService(Node):
     def __init__(self):
         super().__init__('bo_service')
@@ -19,15 +20,18 @@ class BOService(Node):
         self.nr_init = 3
 
     def bo_callback(self, request, response):
+        self.get_logger().info('Bayesian Optimization Service started!')
         nr_weights = request.nr_weights
-        max_steps = request.steps
+        max_steps = request.max_steps
         nr_episodes = request.nr_episodes
         nr_runs = request.nr_runs
         acq = request.acquisition_function
+        self.get_logger().info(acq)
 
         reward = np.zeros((nr_episodes, nr_runs))
-        best_pol_reward = np.zeros((nr_runs, 1))
+        best_pol_reward = np.zeros((1, nr_runs))
         best_policy = np.zeros((max_steps, nr_runs))
+        best_weights = np.zeros((nr_weights, nr_runs))
 
         BO = BayesianOptimization(self.env,
                                   max_steps,
@@ -42,14 +46,16 @@ class BOService(Node):
                 x_next = BO.next_observation()
                 BO.eval_new_observation(x_next)
 
-            best_policy[:, i], best_pol_reward[:, i] = BO.get_best_result()
+            best_policy[:, i], best_pol_reward[:, i], best_weights[:, i] = BO.get_best_result()
+
             reward[:, i] = BO.best_reward.T
 
-        response.reward_mean = np.mean(reward, axis=1)
-        response.reward_std = np.std(reward, axis=1)
+        response.reward_mean = np.mean(reward, axis=1).tolist()
+        response.reward_std = np.std(reward, axis=1).tolist()
 
         best_policy_idx = np.argmax(best_pol_reward)
-        response.best_policy = best_policy[:, best_policy_idx]
+        response.best_weights = best_weights[:, best_policy_idx].tolist()
+        response.best_policy = best_policy[:, best_policy_idx].tolist()
         return response
 
 
@@ -60,5 +66,6 @@ def main(args=None):
 
     rclpy.spin(bo_service)
 
+
 if __name__ == '__main__':
     main()