diff --git a/src/active_bo_msgs/CMakeLists.txt b/src/active_bo_msgs/CMakeLists.txt index 1e5e951..e256c6a 100644 --- a/src/active_bo_msgs/CMakeLists.txt +++ b/src/active_bo_msgs/CMakeLists.txt @@ -24,6 +24,8 @@ rosidl_generate_interfaces(${PROJECT_NAME} "srv/RLRollOut.srv" "srv/BO.srv" "srv/ActiveBO.srv" + "srv/ActiveRL.srv" + "srv/ActiveRLEval.srv" "msg/ImageFeedback.msg" ) diff --git a/src/active_bo_msgs/srv/ActiveBO.srv b/src/active_bo_msgs/srv/ActiveBO.srv index c76746b..ff158f6 100644 --- a/src/active_bo_msgs/srv/ActiveBO.srv +++ b/src/active_bo_msgs/srv/ActiveBO.srv @@ -3,7 +3,7 @@ uint16 max_steps uint16 nr_episodes uint16 nr_runs string acquisition_function -uint16 epsilon +float32 epsilon --- float32[] best_policy float32[] best_weights diff --git a/src/active_bo_msgs/srv/ActiveRL.srv b/src/active_bo_msgs/srv/ActiveRL.srv new file mode 100644 index 0000000..8467ae0 --- /dev/null +++ b/src/active_bo_msgs/srv/ActiveRL.srv @@ -0,0 +1,6 @@ +float32[] old_policy +float32[] old_weights +--- +float32[] new_weights +uint16 final_step +float32 reward \ No newline at end of file diff --git a/src/active_bo_msgs/srv/ActiveRLEval.srv b/src/active_bo_msgs/srv/ActiveRLEval.srv new file mode 100644 index 0000000..fe89448 --- /dev/null +++ b/src/active_bo_msgs/srv/ActiveRLEval.srv @@ -0,0 +1,5 @@ +float32[] old_policy +float32[] old_weights +--- +float32[] new_policy +float32[] new_weights \ No newline at end of file diff --git a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py index 890ead1..c4b2f6b 100644 --- a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py +++ b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py @@ -145,6 +145,17 @@ class BayesianOptimization: self.episode += 1 return step_count + def add_new_observation(self, reward, x_new): + self.X = np.vstack((self.X, x_new)) + self.Y = np.vstack((self.Y, reward)) + + if self.episode == 0: + self.best_reward[0] = max(self.Y) + else: + self.best_reward = np.vstack((self.best_reward, max(self.Y))) + + self.episode += 1 + def get_best_result(self): y_hat = self.GP.predict(self.X) idx = np.argmax(y_hat) diff --git a/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py b/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py index 5b0e91a..c37e166 100644 --- a/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py +++ b/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py @@ -102,7 +102,7 @@ class Continuous_MountainCarEnv(gym.Env): metadata = { "render_modes": ["human", "rgb_array"], - "render_fps": 30, + "render_fps": 60, } def __init__(self, render_mode: Optional[str] = None, goal_velocity=0): diff --git a/src/active_bo_ros/active_bo_ros/active_bo_service.py b/src/active_bo_ros/active_bo_ros/active_bo_service.py index f8e574b..b37a159 100644 --- a/src/active_bo_ros/active_bo_ros/active_bo_service.py +++ b/src/active_bo_ros/active_bo_ros/active_bo_service.py @@ -1,4 +1,5 @@ from active_bo_msgs.srv import ActiveBO +from active_bo_msgs.srv import ActiveRL import rclpy from rclpy.node import Node @@ -11,9 +12,11 @@ import numpy as np class ActiveBOService(Node): def __init__(self): - super().__init__('active_bo_servie') + super().__init__('active_bo_service') self.srv = self.create_service(ActiveBO, 'active_bo_srv', self.active_bo_callback) + self.active_rl_client = self.create_client(ActiveRL, 'active_rl_srv') + self.env = Continuous_MountainCarEnv() self.distance_penalty = 0 @@ -38,13 +41,23 @@ class ActiveBOService(Node): nr_init=self.nr_init, acq=acq, nr_weights=nr_weights) + + arl_request = ActiveRL.Request() for i in range(nr_runs): BO.initialize() for j in range(nr_episodes): # active part - if np.random.uniform(0.0, 1.0, 1) < epsilon: - pass + if (j > 3) and (np.random.uniform(0.0, 1.0, 1) < epsilon): + self.get_logger().info('Active User Input') + old_policy, _, old_weights = BO.get_best_result() + + arl_request.old_policy = old_policy.tolist() + arl_request.old_weights = old_weights.tolist() + arl_response = self.active_rl_client.call(arl_request) + + BO.add_new_observation(arl_response.reward, arl_response.new_weights) + # BO part else: x_next = BO.next_observation() diff --git a/src/active_bo_ros/active_bo_ros/active_rl_service.py b/src/active_bo_ros/active_bo_ros/active_rl_service.py new file mode 100644 index 0000000..e07830b --- /dev/null +++ b/src/active_bo_ros/active_bo_ros/active_rl_service.py @@ -0,0 +1,116 @@ +from active_bo_msgs.srv import ActiveRL +from active_bo_msgs.srv import ActiveRLEval +from active_bo_msgs.msg import ImageFeedback + +import rclpy +from rclpy.node import Node + +from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv + +import numpy as np + + +class ActiveRLService(Node): + def __init__(self): + super().__init__('active_rl_service') + self.srv = self.create_service(ActiveRL, 'active_rl_srv', self.active_rl_callback) + self.eval_srv = self.create_client(ActiveRLEval, 'active_rl_eval_srv') + + self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1) + + self.env = Continuous_MountainCarEnv(render_mode='rgb_array') + self.distance_penalty = 0 + + def active_rl_callback(self, request, response): + + feedback_msg = ImageFeedback() + + reward = 0 + step_count = 0 + old_policy = request.old_policy + old_weights = request.old_weights + + eval_request = ActiveRLEval.Request() + eval_request.old_policy = old_policy + eval_request.old_weights = old_weights + + self.env.reset() + + self.get_logger().info('Best policy so far!') + + for i in range(len(old_policy)): + action = old_policy[i] + output = self.env.step(action) + + done = output[2] + + rgb_array = self.env.render() + rgb_shape = rgb_array.shape + + red = rgb_array[:, :, 0].flatten().tolist() + green = rgb_array[:, :, 1].flatten().tolist() + blue = rgb_array[:, :, 2].flatten().tolist() + + feedback_msg.height = rgb_shape[0] + feedback_msg.width = rgb_shape[1] + feedback_msg.red = red + feedback_msg.green = green + feedback_msg.blue = blue + + self.publisher.publish(feedback_msg) + + if done: + break + + self.get_logger().info('Enter new solution!') + eval_response = self.eval_srv.call(eval_request) + + new_policy = eval_response.new_policy.tolist() + + for i in range(len(new_policy)): + action = new_policy[i] + output = self.env.step(action) + + reward += output[1] + done = output[2] + step_count += 1 + + rgb_array = self.env.render() + rgb_shape = rgb_array.shape + + red = rgb_array[:, :, 0].flatten().tolist() + green = rgb_array[:, :, 1].flatten().tolist() + blue = rgb_array[:, :, 2].flatten().tolist() + + feedback_msg.height = rgb_shape[0] + feedback_msg.width = rgb_shape[1] + feedback_msg.red = red + feedback_msg.green = green + feedback_msg.blue = blue + + self.publisher.publish(feedback_msg) + + if done: + break + + if not done and i == len(new_policy): + distance = -(self.env.goal_position - output[0][0]) + reward += distance * self.distance_penalty + + response.new_weights = eval_response.Response.new_weights + response.reward = reward + response.final_step = step_count + + return response + + +def main(args=None): + rclpy.init(args=args) + + active_rl_service = ActiveRLService() + + rclpy.spin(active_rl_service) + + +if __name__ == '__main__': + main() diff --git a/src/active_bo_ros/active_bo_ros/policy_service.py b/src/active_bo_ros/active_bo_ros/policy_service.py index a499012..d585bc2 100644 --- a/src/active_bo_ros/active_bo_ros/policy_service.py +++ b/src/active_bo_ros/active_bo_ros/policy_service.py @@ -6,6 +6,7 @@ from rclpy.node import Node from active_bo_ros.PolicyModel.GaussianRBFModel import GaussianRBF import numpy as np + class PolicyService(Node): def __init__(self): super().__init__('policy_service') diff --git a/src/active_bo_ros/dump/rl_action.launch.py b/src/active_bo_ros/dump/rl_action.launch.py deleted file mode 100755 index 67b95ee..0000000 --- a/src/active_bo_ros/dump/rl_action.launch.py +++ /dev/null @@ -1,11 +0,0 @@ -from launch import LaunchDescription -from launch_ros.actions import Node - -def generate_launch_description(): - return LaunchDescription([ - Node( - package='active_bo_ros', - executable='rl_action', - name='rl_action' - ), - ]) \ No newline at end of file diff --git a/src/active_bo_ros/dump/rl_action_server.py b/src/active_bo_ros/dump/rl_action_server.py deleted file mode 100644 index 1077204..0000000 --- a/src/active_bo_ros/dump/rl_action_server.py +++ /dev/null @@ -1,67 +0,0 @@ -from active_bo_msgs.srv import RLRollOut -from active_bo_msgs.msg import ImageFeedback - -import rclpy -from rclpy.node import Node - -from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv - -import numpy as np -import time - -class RLService(Node): - def __init__(self): - super().__init__('rl_service') - self.srv = self.create_service(RLRollOut, 'rl_srv', self.rl_callback) - - self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1) - - self.env = Continuous_MountainCarEnv(render_mode='rgb_array') - self.distance_penalty = 0 - - def rl_callback(self, request, response): - - feedback_msg = ImageFeedback() - - reward = 0 - step_count = 0 - policy = request.policy - - for i in range(len(policy)): - action = policy[i] - output = self.env.step(action) - - reward += output[1] - done = output[2] - step_count += 1 - - rgb_array = output[5] - rgb_shape = rgb_array.shape - - feedback_msg.height = rgb_shape[0] - feedback_msg.width = rgb_shape[1] - - self.publisher.publish(feedback_msg) - - if done: - break - - distance = -(self.env.goal_position - output[0][0]) - reward += distance * self.distance_penalty - - time.sleep(0.01) - - response.reward = reward - response.final_step = step_count - - return response - -def main(args=None): - rclpy.init(args=args) - - rl_service = RLService() - - rclpy.spin(rl_service) - -if __name__ == '__main__': - main() diff --git a/src/active_bo_ros/launch/active_bo_service.launch.py b/src/active_bo_ros/launch/active_bo_service.launch.py new file mode 100755 index 0000000..362ff51 --- /dev/null +++ b/src/active_bo_ros/launch/active_bo_service.launch.py @@ -0,0 +1,16 @@ +from launch import LaunchDescription +from launch_ros.actions import Node + +def generate_launch_description(): + return LaunchDescription([ + Node( + package='active_bo_ros', + executable='active_bo_srv', + name='active_bo_service' + ), + Node( + package='active_bo_ros', + executable='active_rl_srv', + name='active_rl_service' + ), + ]) diff --git a/src/active_bo_ros/setup.py b/src/active_bo_ros/setup.py index 98587c5..7b03d7f 100644 --- a/src/active_bo_ros/setup.py +++ b/src/active_bo_ros/setup.py @@ -29,7 +29,9 @@ setup( 'console_scripts': [ 'policy_srv = active_bo_ros.policy_service:main', 'rl_srv = active_bo_ros.rl_service:main', - 'bo_srv = active_bo_ros.bo_service:main' + 'bo_srv = active_bo_ros.bo_service:main', + 'active_bo_srv = active_bo_ros.active_bo_service:main', + 'active_rl_srv = active_bo_ros.active_rl_service:main', ], }, )