diff --git a/src/active_bo_msgs/CMakeLists.txt b/src/active_bo_msgs/CMakeLists.txt index 7d944d2..1e5e951 100644 --- a/src/active_bo_msgs/CMakeLists.txt +++ b/src/active_bo_msgs/CMakeLists.txt @@ -23,6 +23,7 @@ rosidl_generate_interfaces(${PROJECT_NAME} "srv/WeightToPolicy.srv" "srv/RLRollOut.srv" "srv/BO.srv" + "srv/ActiveBO.srv" "msg/ImageFeedback.msg" ) diff --git a/src/active_bo_msgs/srv/ActiveBO.srv b/src/active_bo_msgs/srv/ActiveBO.srv new file mode 100644 index 0000000..c76746b --- /dev/null +++ b/src/active_bo_msgs/srv/ActiveBO.srv @@ -0,0 +1,11 @@ +uint16 nr_weights +uint16 max_steps +uint16 nr_episodes +uint16 nr_runs +string acquisition_function +uint16 epsilon +--- +float32[] best_policy +float32[] best_weights +float32[] reward_mean +float32[] reward_std \ No newline at end of file diff --git a/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ConfidenceBound.py b/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ConfidenceBound.py index 48fd5b2..9408bca 100644 --- a/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ConfidenceBound.py +++ b/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ConfidenceBound.py @@ -1,5 +1,6 @@ import numpy as np + def ConfidenceBound(gp, X, nr_test, nr_weights, lam=1.2, seed=None, lower=-1.0, upper=1.0): y_hat = gp.predict(X) best_y = max(y_hat) diff --git a/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ExpectedImprovement.py b/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ExpectedImprovement.py index 6b17a06..4fedb1c 100644 --- a/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ExpectedImprovement.py +++ b/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ExpectedImprovement.py @@ -1,6 +1,7 @@ import numpy as np from scipy.stats import norm + def ExpectedImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0): y_hat = gp.predict(X) best_y = max(y_hat) diff --git a/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ProbabilityOfImprovement.py b/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ProbabilityOfImprovement.py index 3253438..1b3784d 100644 --- a/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ProbabilityOfImprovement.py +++ b/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/ProbabilityOfImprovement.py @@ -1,6 +1,7 @@ import numpy as np from scipy.stats import norm + def ProbabilityOfImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0): y_hat = gp.predict(X) best_y = max(y_hat) diff --git a/src/active_bo_ros/active_bo_ros/active_bo_service.py b/src/active_bo_ros/active_bo_ros/active_bo_service.py new file mode 100644 index 0000000..f8e574b --- /dev/null +++ b/src/active_bo_ros/active_bo_ros/active_bo_service.py @@ -0,0 +1,75 @@ +from active_bo_msgs.srv import ActiveBO + +import rclpy +from rclpy.node import Node + +from active_bo_ros.BayesianOptimization.BayesianOptimization import BayesianOptimization +from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv + +import numpy as np + + +class ActiveBOService(Node): + def __init__(self): + super().__init__('active_bo_servie') + self.srv = self.create_service(ActiveBO, 'active_bo_srv', self.active_bo_callback) + + self.env = Continuous_MountainCarEnv() + self.distance_penalty = 0 + + self.nr_init = 3 + + def active_bo_callback(self, request, response): + self.get_logger().info('Active Bayesian Optimization Service started!') + nr_weights = request.nr_weights + max_steps = request.max_steps + nr_episodes = request.nr_episodes + nr_runs = request.nr_runs + acq = request.acquisition_function + epsilon = request.epsilon + + reward = np.zeros((nr_episodes, nr_runs)) + best_pol_reward = np.zeros((1, nr_runs)) + best_policy = np.zeros((max_steps, nr_runs)) + best_weights = np.zeros((nr_weights, nr_runs)) + + BO = BayesianOptimization(self.env, + max_steps, + nr_init=self.nr_init, + acq=acq, + nr_weights=nr_weights) + for i in range(nr_runs): + BO.initialize() + + for j in range(nr_episodes): + # active part + if np.random.uniform(0.0, 1.0, 1) < epsilon: + pass + # BO part + else: + x_next = BO.next_observation() + BO.eval_new_observation(x_next) + + best_policy[:, i], best_pol_reward[:, i], best_weights[:, i] = BO.get_best_result() + + reward[:, i] = BO.best_reward.T + + response.reward_mean = np.mean(reward, axis=1).tolist() + response.reward_std = np.std(reward, axis=1).tolist() + + best_policy_idx = np.argmax(best_pol_reward) + response.best_weights = best_weights[:, best_policy_idx].tolist() + response.best_policy = best_policy[:, best_policy_idx].tolist() + return response + + +def main(args=None): + rclpy.init(args=args) + + active_bo_service = ActiveBOService() + + rclpy.spin(active_bo_service) + + +if __name__ == '__main__': + main() diff --git a/src/active_bo_ros/launch/launch_active_bo.launch.py b/src/active_bo_ros/launch/launch_active_bo.launch.py new file mode 100755 index 0000000..d39825a --- /dev/null +++ b/src/active_bo_ros/launch/launch_active_bo.launch.py @@ -0,0 +1,47 @@ +from launch import LaunchDescription +from launch.actions import IncludeLaunchDescription +from launch.launch_description_sources import PythonLaunchDescriptionSource + +from ament_index_python import get_package_share_directory +import os + + +def generate_launch_description(): + websocket_launch = IncludeLaunchDescription( + PythonLaunchDescriptionSource( + os.path.join( + get_package_share_directory('active_bo_ros'), + 'rosbridge_server.launch.py' + ) + ) + ) + policy_launch = IncludeLaunchDescription( + PythonLaunchDescriptionSource( + os.path.join( + get_package_share_directory('active_bo_ros'), + 'policy_service.launch.py' + ) + ) + ) + rl_launch = IncludeLaunchDescription( + PythonLaunchDescriptionSource( + os.path.join( + get_package_share_directory('active_bo_ros'), + 'rl_service.launch.py' + ) + ) + ) + bo_launch = IncludeLaunchDescription( + PythonLaunchDescriptionSource( + os.path.join( + get_package_share_directory('active_bo_ros'), + 'bo_service.launch.py' + ) + ) + ) + return LaunchDescription([ + websocket_launch, + policy_launch, + rl_launch, + bo_launch + ]) diff --git a/src/active_bo_ros/launch/policy_service.launch.py b/src/active_bo_ros/launch/policy_service.launch.py index 2004daf..968382c 100755 --- a/src/active_bo_ros/launch/policy_service.launch.py +++ b/src/active_bo_ros/launch/policy_service.launch.py @@ -1,6 +1,7 @@ from launch import LaunchDescription from launch_ros.actions import Node + def generate_launch_description(): return LaunchDescription([ Node( diff --git a/src/active_bo_ros/launch/rosbridge_server.launch.py b/src/active_bo_ros/launch/rosbridge_server.launch.py new file mode 100755 index 0000000..2d1b701 --- /dev/null +++ b/src/active_bo_ros/launch/rosbridge_server.launch.py @@ -0,0 +1,21 @@ +from launch import LaunchDescription +from launch.actions import IncludeLaunchDescription +from launch_xml.launch_description_sources import XMLLaunchDescriptionSource + +from ament_index_python import get_package_share_directory +import os + + +def generate_launch_description(): + rosbridge_launch = IncludeLaunchDescription( + XMLLaunchDescriptionSource( + os.path.join( + get_package_share_directory('rosbridge_server'), + 'launch/rosbridge_websocket_launch.xml' + ) + ) + ) + + return LaunchDescription([ + rosbridge_launch + ])