diff --git a/src/active_bo_msgs/CMakeLists.txt b/src/active_bo_msgs/CMakeLists.txt index affd789..a80abf8 100644 --- a/src/active_bo_msgs/CMakeLists.txt +++ b/src/active_bo_msgs/CMakeLists.txt @@ -21,7 +21,8 @@ find_package(rosidl_default_generators REQUIRED) rosidl_generate_interfaces(${PROJECT_NAME} "srv/WeightToPolicy.srv" - "action/RLRollOut.action" + "srv/RLRollOut.srv" + "msg/ImageFeedback.msg" ) if(BUILD_TESTING) diff --git a/src/active_bo_msgs/msg/ImageFeedback.msg b/src/active_bo_msgs/msg/ImageFeedback.msg new file mode 100644 index 0000000..1c6a82a --- /dev/null +++ b/src/active_bo_msgs/msg/ImageFeedback.msg @@ -0,0 +1,5 @@ +int16 height +int16 width +uint8[] red +uint8[] green +uint8[] blue \ No newline at end of file diff --git a/src/active_bo_msgs/srv/RLRollOut.srv b/src/active_bo_msgs/srv/RLRollOut.srv new file mode 100644 index 0000000..00555f7 --- /dev/null +++ b/src/active_bo_msgs/srv/RLRollOut.srv @@ -0,0 +1,4 @@ +float32[] policy +--- +float32 reward +uint16 final_step \ No newline at end of file diff --git a/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py b/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py index a2c31fb..cf6e755 100644 --- a/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py +++ b/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py @@ -139,11 +139,11 @@ class Continuous_MountainCarEnv(gym.Env): low=self.low_state, high=self.high_state, dtype=np.float32 ) - def step(self, action: np.ndarray): + def step(self, action: float): position = self.state[0] velocity = self.state[1] - force = min(max(action[0], self.min_action), self.max_action) + force = min(max(action, self.min_action), self.max_action) velocity += force * self.power - 0.0025 * math.cos(3 * position) if velocity > self.max_speed: @@ -166,7 +166,7 @@ class Continuous_MountainCarEnv(gym.Env): reward = 0 if terminated: reward += 10 - reward -= math.pow(action[0], 2) * 0.1 + reward -= math.pow(action, 2) * 0.1 reward -= 1 self.state = np.array([position, velocity], dtype=np.float32) diff --git a/src/active_bo_ros/active_bo_ros/rl_action_server.py b/src/active_bo_ros/active_bo_ros/rl_service.py similarity index 55% rename from src/active_bo_ros/active_bo_ros/rl_action_server.py rename to src/active_bo_ros/active_bo_ros/rl_service.py index 3cbebaa..6f95527 100644 --- a/src/active_bo_ros/active_bo_ros/rl_action_server.py +++ b/src/active_bo_ros/active_bo_ros/rl_service.py @@ -1,32 +1,33 @@ -from active_bo_msgs.action import RLRollOut +from active_bo_msgs.srv import RLRollOut +from active_bo_msgs.msg import ImageFeedback import rclpy from rclpy.node import Node -from rclpy.action import ActionServer from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv import numpy as np import time -class RLActionServer(Node): +class RLService(Node): def __init__(self): - super().__init__('rl_action') - self.action_server = ActionServer(self, - RLRollOut, - 'reinforcement_learning_action', - self.execute_callback) + super().__init__('rl_service') + self.srv = self.create_service(RLRollOut, 'rl_srv', self.rl_callback) + + self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1) self.env = Continuous_MountainCarEnv(render_mode='rgb_array') self.distance_penalty = 0 - def execute_callback(self, action_handle): + def rl_callback(self, request, response): - feedback_msg = RLRollOut.Feedback() + feedback_msg = ImageFeedback() reward = 0 step_count = 0 - policy = action_handle.request.policy + policy = request.policy + + self.env.reset() for i in range(len(policy)): action = policy[i] @@ -42,7 +43,7 @@ class RLActionServer(Node): feedback_msg.height = rgb_shape[0] feedback_msg.width = rgb_shape[1] - action_handle.publish_feedback(feedback_msg) + self.publisher.publish(feedback_msg) if done: break @@ -52,20 +53,17 @@ class RLActionServer(Node): time.sleep(0.01) - action_handle.succeed() + response.reward = reward + response.final_step = step_count - result = RLRollOut.Result() - result.reward = reward - result.final_step = step_count - - return result + return response def main(args=None): rclpy.init(args=args) - rl_action_server = RLActionServer() + rl_service = RLService() - rclpy.spin(rl_action_server) + rclpy.spin(rl_service) if __name__ == '__main__': main() diff --git a/src/active_bo_ros/launch/rl_action.launch.py b/src/active_bo_ros/dump/rl_action.launch.py similarity index 100% rename from src/active_bo_ros/launch/rl_action.launch.py rename to src/active_bo_ros/dump/rl_action.launch.py diff --git a/src/active_bo_ros/dump/rl_action_server.py b/src/active_bo_ros/dump/rl_action_server.py new file mode 100644 index 0000000..1077204 --- /dev/null +++ b/src/active_bo_ros/dump/rl_action_server.py @@ -0,0 +1,67 @@ +from active_bo_msgs.srv import RLRollOut +from active_bo_msgs.msg import ImageFeedback + +import rclpy +from rclpy.node import Node + +from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv + +import numpy as np +import time + +class RLService(Node): + def __init__(self): + super().__init__('rl_service') + self.srv = self.create_service(RLRollOut, 'rl_srv', self.rl_callback) + + self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1) + + self.env = Continuous_MountainCarEnv(render_mode='rgb_array') + self.distance_penalty = 0 + + def rl_callback(self, request, response): + + feedback_msg = ImageFeedback() + + reward = 0 + step_count = 0 + policy = request.policy + + for i in range(len(policy)): + action = policy[i] + output = self.env.step(action) + + reward += output[1] + done = output[2] + step_count += 1 + + rgb_array = output[5] + rgb_shape = rgb_array.shape + + feedback_msg.height = rgb_shape[0] + feedback_msg.width = rgb_shape[1] + + self.publisher.publish(feedback_msg) + + if done: + break + + distance = -(self.env.goal_position - output[0][0]) + reward += distance * self.distance_penalty + + time.sleep(0.01) + + response.reward = reward + response.final_step = step_count + + return response + +def main(args=None): + rclpy.init(args=args) + + rl_service = RLService() + + rclpy.spin(rl_service) + +if __name__ == '__main__': + main() diff --git a/src/active_bo_ros/launch/rl_service.launch.py b/src/active_bo_ros/launch/rl_service.launch.py new file mode 100755 index 0000000..aedc7a9 --- /dev/null +++ b/src/active_bo_ros/launch/rl_service.launch.py @@ -0,0 +1,11 @@ +from launch import LaunchDescription +from launch_ros.actions import Node + +def generate_launch_description(): + return LaunchDescription([ + Node( + package='active_bo_ros', + executable='rl_srv', + name='rl_service' + ), + ]) \ No newline at end of file diff --git a/src/active_bo_ros/setup.py b/src/active_bo_ros/setup.py index a15bcc3..341d0f6 100644 --- a/src/active_bo_ros/setup.py +++ b/src/active_bo_ros/setup.py @@ -16,7 +16,7 @@ setup( ('share/' + package_name, ['package.xml']), (os.path.join('share', package_name), glob('launch/*.launch.py')), ], - install_requires=['setuptools'], + install_requires=['setuptools', 'gym', 'numpy'], zip_safe=True, maintainer='cpsfeith', maintainer_email='nikolaus.feith@unileoben.ac.at', @@ -26,7 +26,7 @@ setup( entry_points={ 'console_scripts': [ 'policy_srv = active_bo_ros.policy_service:main', - 'rl_action = active_bo_ros.rl_action_server:main' + 'rl_srv = active_bo_ros.rl_service:main' ], }, )