From a05e5af477b8ad9493608d0eb611615436470975 Mon Sep 17 00:00:00 2001 From: "nikolaus.feith" Date: Tue, 7 Mar 2023 13:23:37 +0100 Subject: [PATCH] BO execute error --- src/active_bo_msgs/CMakeLists.txt | 1 + src/active_bo_msgs/srv/BO.srv | 9 +++ .../BayesianOptimization.py | 12 +++- src/active_bo_ros/active_bo_ros/bo_service.py | 64 +++++++++++++++++++ src/active_bo_ros/launch/bo_service.launch.py | 11 ++++ src/active_bo_ros/setup.py | 11 ++-- 6 files changed, 101 insertions(+), 7 deletions(-) create mode 100644 src/active_bo_msgs/srv/BO.srv mode change 100644 => 100755 src/active_bo_ros/launch/bo_service.launch.py diff --git a/src/active_bo_msgs/CMakeLists.txt b/src/active_bo_msgs/CMakeLists.txt index a80abf8..7d944d2 100644 --- a/src/active_bo_msgs/CMakeLists.txt +++ b/src/active_bo_msgs/CMakeLists.txt @@ -22,6 +22,7 @@ find_package(rosidl_default_generators REQUIRED) rosidl_generate_interfaces(${PROJECT_NAME} "srv/WeightToPolicy.srv" "srv/RLRollOut.srv" + "srv/BO.srv" "msg/ImageFeedback.msg" ) diff --git a/src/active_bo_msgs/srv/BO.srv b/src/active_bo_msgs/srv/BO.srv new file mode 100644 index 0000000..b8e1cd1 --- /dev/null +++ b/src/active_bo_msgs/srv/BO.srv @@ -0,0 +1,9 @@ +uint16 nr_weights +uint16 max_steps +uint16 nr_episodes +uint16 nr_runs +string acquisition_function +--- +float32[] best_policy +float32[] reward_mean +float32[] reward_std \ No newline at end of file diff --git a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py index 767374a..6e3077d 100644 --- a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py +++ b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py @@ -7,9 +7,6 @@ from active_bo_ros.AcquistionFunctions.ExpectedImprovement import ExpectedImprov from active_bo_ros.AcquistionFunctions.ProbabilityOfImprovement import ProbabilityOfImprovement from active_bo_ros.AcquistionFunctions.ConfidenceBound import ConfidenceBound -from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv - - class BayesianOptimization: def __init__(self, env, nr_steps, nr_init=3, acq='ei', nr_weights=6, policy_seed=None): self.env = env @@ -145,5 +142,14 @@ class BayesianOptimization: self.episode += 1 return step_count + def get_best_result(self): + y_hat = self.GP.predict(self.X) + idx = np.argmax(y_hat) + x_max = self.X[idx, :] + + self.policy_model.weights = x_max + + return self.policy_model.rollout(), y_hat[idx] + diff --git a/src/active_bo_ros/active_bo_ros/bo_service.py b/src/active_bo_ros/active_bo_ros/bo_service.py index e69de29..1f490bc 100644 --- a/src/active_bo_ros/active_bo_ros/bo_service.py +++ b/src/active_bo_ros/active_bo_ros/bo_service.py @@ -0,0 +1,64 @@ +from active_bo_msgs.srv import BO + +import rclpy +from rclpy.node import Node + +from active_bo_ros.BayesianOptimization.BayesianOptimization import BayesianOptimization +from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv + +import numpy as np + +class BOService(Node): + def __init__(self): + super().__init__('bo_service') + self.srv = self.create_service(BO, 'bo_srv', self.bo_callback) + + self.env = Continuous_MountainCarEnv() + self.distance_penalty = 0 + + self.nr_init = 3 + + def bo_callback(self, request, response): + nr_weights = request.nr_weights + max_steps = request.steps + nr_episodes = request.nr_episodes + nr_runs = request.nr_runs + acq = request.acquisition_function + + reward = np.zeros((nr_episodes, nr_runs)) + best_pol_reward = np.zeros((nr_runs, 1)) + best_policy = np.zeros((max_steps, nr_runs)) + + BO = BayesianOptimization(self.env, + max_steps, + nr_init=self.nr_init, + acq=acq, + nr_weights=nr_weights) + + for i in range(nr_runs): + BO.initialize() + + for j in range(nr_episodes): + x_next = BO.next_observation() + BO.eval_new_observation(x_next) + + best_policy[:, i], best_pol_reward[:, i] = BO.get_best_result() + reward[:, i] = BO.best_reward.T + + response.reward_mean = np.mean(reward, axis=1) + response.reward_std = np.std(reward, axis=1) + + best_policy_idx = np.argmax(best_pol_reward) + response.best_policy = best_policy[:, best_policy_idx] + return response + + +def main(args=None): + rclpy.init(args=args) + + bo_service = BOService() + + rclpy.spin(bo_service) + +if __name__ == '__main__': + main() diff --git a/src/active_bo_ros/launch/bo_service.launch.py b/src/active_bo_ros/launch/bo_service.launch.py old mode 100644 new mode 100755 index e69de29..6477bc3 --- a/src/active_bo_ros/launch/bo_service.launch.py +++ b/src/active_bo_ros/launch/bo_service.launch.py @@ -0,0 +1,11 @@ +from launch import LaunchDescription +from launch_ros.actions import Node + +def generate_launch_description(): + return LaunchDescription([ + Node( + package='active_bo_ros', + executable='bo_srv', + name='bo_srv' + ), + ]) \ No newline at end of file diff --git a/src/active_bo_ros/setup.py b/src/active_bo_ros/setup.py index 341d0f6..be8e6d3 100644 --- a/src/active_bo_ros/setup.py +++ b/src/active_bo_ros/setup.py @@ -8,15 +8,17 @@ setup( name=package_name, version='0.0.0', packages=[package_name, - package_name+'/PolicyModel', - package_name+'/ReinforcementLearning'], + package_name + '/PolicyModel', + package_name + '/ReinforcementLearning', + package_name + '/AcquisitionFunctions', + package_name + '/BayesianOptimization'], data_files=[ ('share/ament_index/resource_index/packages', ['resource/' + package_name]), ('share/' + package_name, ['package.xml']), (os.path.join('share', package_name), glob('launch/*.launch.py')), ], - install_requires=['setuptools', 'gym', 'numpy'], + install_requires=['setuptools', 'gym', 'numpy', 'sklearn'], zip_safe=True, maintainer='cpsfeith', maintainer_email='nikolaus.feith@unileoben.ac.at', @@ -26,7 +28,8 @@ setup( entry_points={ 'console_scripts': [ 'policy_srv = active_bo_ros.policy_service:main', - 'rl_srv = active_bo_ros.rl_service:main' + 'rl_srv = active_bo_ros.rl_service:main', + 'bo_srv = active_bo_ros.bo_service:main' ], }, )