Rewrite Active_BO and Active_RL as Topic instead of Srv

Starting with testing
This commit is contained in:
Niko Feith 2023-04-04 15:21:08 +02:00
parent e70459dc6e
commit baa81ca4a7
8 changed files with 392 additions and 4 deletions

View File

@ -25,8 +25,10 @@ rosidl_generate_interfaces(${PROJECT_NAME}
"srv/BO.srv"
"srv/ActiveBO.srv"
"srv/ActiveRL.srv"
"srv/ActiveRLEval.srv"
"msg/ActiveRLEval.msg"
"msg/ActiveBORequest.msg"
"msg/ActiveBOResponse.msg"
"msg/ActiveRLResponse.msg"
"msg/ActiveRL.msg"
"msg/ImageFeedback.msg"
)

View File

@ -0,0 +1,6 @@
uint16 nr_weights
uint16 max_steps
uint16 nr_episodes
uint16 nr_runs
string acquisition_function
float32 epsilon

View File

@ -0,0 +1,4 @@
float32[] best_policy
float32[] best_weights
float32[] reward_mean
float32[] reward_std

View File

@ -0,0 +1,3 @@
float32[] weights
uint16 final_step
float32 reward

View File

@ -0,0 +1,194 @@
from active_bo_msgs.msg import ActiveBORequest
from active_bo_msgs.msg import ActiveBOResponse
from active_bo_msgs.msg import ActiveRL
from active_bo_msgs.msg import ActiveRLResponse
import rclpy
from rclpy.node import Node
from rclpy.callback_groups import ReentrantCallbackGroup
from active_bo_ros.BayesianOptimization.BayesianOptimization import BayesianOptimization
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
import numpy as np
import time
class ActiveBOTopic(Node):
def __init__(self):
super().__init__('active_bo_topic')
bo_callback_group = ReentrantCallbackGroup()
rl_callback_group = ReentrantCallbackGroup()
mainloop_callback_group = ReentrantCallbackGroup()
# Active Bayesian Optimization Publisher, Subscriber and Message attributes
self.active_bo_pub = self.create_publisher(ActiveBOResponse,
'active_bo_response',
1, callback_group=bo_callback_group)
self.active_bo_sub = self.create_subscription(ActiveBORequest,
'active_bo_request',
self.active_bo_callback,
1, callback_group=bo_callback_group)
self.active_bo_pending = False
self.bo_nr_weights = None
self.bo_steps = None
self.bo_episodes = None
self.bo_runs = None
self.bo_acq_fcn = None
self.bo_epsilon = None
self.current_run = 0
self.current_episode = 0
# Active Reinforcement Learning Publisher, Subscriber and Message attributes
self.active_rl_pub = self.create_publisher(ActiveRL,
'active_rl_request',
1, callback_group=rl_callback_group)
self.active_rl_sub = self.create_subscription(ActiveRLResponse,
'active_rl_response',
self.active_rl_callback,
1, callback_group=rl_callback_group)
self.active_rl_pending = False
self.rl_weights = None
self.rl_final_step = None
self.rl_reward = None
# RL Environments and BO
self.env = Continuous_MountainCarEnv()
self.distance_penalty = 0
self.BO = None
self.nr_init = 3
self.reward = None
self.best_pol_reward = None
self.best_policy = None
self.best_weights = None
# Main loop timer object
self.mainloop_timer_period = 0.1
self.mainloop = self.create_timer(self.mainloop_timer_period,
self.mainloop_callback,
callback_group=mainloop_callback_group)
def reset_bo_request(self):
self.bo_nr_weights = None
self.bo_steps = None
self.bo_episodes = None
self.bo_runs = None
self.bo_acq_fcn = None
self.bo_epsilon = None
self.current_run = 0
self.current_episode = 0
def active_bo_callback(self, msg):
self.get_logger().info('Active Bayesian Optimization request pending!')
self.active_bo_pending = True
self.bo_nr_weights = msg.nr_weights
self.bo_steps = msg.max_steps
self.bo_episodes = msg.nr_episodes
self.bo_runs = msg.nr_runs
self.bo_acq_fcn = msg.acquisition_function
self.bo_epsilon = msg.epsilon
def reset_rl_response(self):
self.rl_weights = None
self.rl_final_step = None
self.rl_reward = None
def active_rl_callback(self, msg):
self.get_logger().info('Active Reinforcement Learning response pending!')
self.active_rl_pending = False
self.rl_weights = None
self.rl_final_step = None
self.rl_reward = None
def mainloop_callback(self):
if self.active_bo_pending:
if self.BO is None:
self.BO = BayesianOptimization(self.env,
self.bo_steps,
nr_init=self.nr_init,
acq=self.bo_acq_fcn,
nr_weights=self.bo_nr_weights)
self.reward = np.zeros((self.bo_episodes, self.bo_runs))
self.best_pol_reward = np.zeros((1, self.bo_runs))
self.best_policy = np.zeros((self.bo_steps, self.bo_runs))
self.best_weights = np.zeros((self.bo_nr_weights, self.bo_runs))
self.BO.initialize()
if self.current_run >= self.bo_runs:
bo_response = ActiveBOResponse()
best_policy_idx = np.argmax(self.best_pol_reward)
bo_response.best_policy = self.best_policy[:, best_policy_idx].tolist()
bo_response.best_weights = self.best_weights[:, best_policy_idx].tolist()
bo_response.reward_mean = np.mean(self.reward, axis=1).tolist()
bo_response.reward_std = np.std(self.reward, axis=1).tolist()
self.active_bo_pub.publish(bo_response)
self.reset_bo_request()
self.active_bo_pending = False
else:
if self.active_rl_pending:
pass
elif self.rl_weights is not None and not self.active_rl_pending:
try:
self.BO.add_new_observation(self.rl_reward, self.rl_weights)
self.reset_rl_response()
except Exception as e:
self.get_logger().error(f'Active Reinforcement Learning failed to add new observation: {e}')
else:
if self.current_episode < self.bo_episodes:
if np.random.uniform(0.0, 1.0, 1) < self.bo_epsilon:
active_rl_request = ActiveRL()
old_policy, _, old_weights = self.BO.get_best_result()
active_rl_request.policy = old_policy.tolist()
active_rl_request.weights = old_weights.tolist()
self.get_logger().info('Calling: Active RL')
self.active_rl_pub.publish(active_rl_request)
self.active_rl_pending = True
else:
x_next = self.BO.next_observation()
self.BO.eval_new_observation(x_next)
self.best_policy[:, self.current_run], \
self.best_pol_reward[:, self.current_run], \
self.best_weights[:, self.current_run] = self.BO.get_best_result()
self.reward[:, self.current_run] = self.BO.best_reward.T
self.current_episode += 1
else:
self.current_episode = 0
self.current_run += 1
def main(args=None):
rclpy.init(args=args)
active_bo_topic = ActiveBOTopic()
rclpy.spin(active_bo_topic)
try:
rclpy.spin(active_bo_topic)
except KeyboardInterrupt:
pass
active_bo_topic.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@ -1,6 +1,6 @@
from active_bo_msgs.srv import ActiveRL
from active_bo_msgs.msg import ImageFeedback
from active_bo_msgs.msg import ActiveRLEval
from active_bo_msgs.msg import ActiveRL as ActiveRLEval
import rclpy
from rclpy.node import Node
@ -36,7 +36,7 @@ class ActiveRLService(Node):
self.active_rl_eval_callback,
1,
callback_group=topic_callback_group)
# active_rl_eval_response
self.eval_response_received = False
self.eval_response = None

View File

@ -0,0 +1,179 @@
from active_bo_msgs.msg import ActiveRL
from active_bo_msgs.msg import ActiveRLResponse
from active_bo_msgs.msg import ImageFeedback
import rclpy
from rclpy.node import Node
from rclpy.callback_groups import ReentrantCallbackGroup
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
import numpy as np
import time
import copy
class ActiveRLService(Node):
def __init__(self):
super().__init__('active_rl_service')
rl_callback_group = ReentrantCallbackGroup()
topic_callback_group = ReentrantCallbackGroup()
mainloop_callback_group = ReentrantCallbackGroup()
# Active Reinforcement Learning Publisher, Subscriber and Message attributes
self.active_rl_pub = self.create_publisher(ActiveRLResponse,
'active_rl_response',
1, callback_group=rl_callback_group)
self.active_rl_sub = self.create_subscription(ActiveRL,
'active_rl_request',
self.active_rl_callback,
1, callback_group=rl_callback_group)
self.active_rl_pending = False
self.rl_policy = None
self.rl_weights = None
self.rl_reward = 0.0
self.rl_step = 0
# Image publisher to publish the rgb array from the gym environment
self.image_pub = self.create_publisher(ImageFeedback,
'rl_feedback',
1, callback_group=topic_callback_group)
# Active RL Evaluation Publisher, Subscriber and Message attributes
self.eval_pub = self.create_publisher(ActiveRL,
'active_rl_eval_request',
1,
callback_group=topic_callback_group)
self.eval_sub = self.create_subscription(ActiveRL,
'active_rl_eval_response',
self.active_rl_eval_callback,
1,
callback_group=topic_callback_group)
self.eval_response_received = False
self.eval_policy = None
self.eval_weights = None
# RL Environments
self.env = Continuous_MountainCarEnv(render_mode='rgb_array')
self.distance_penalty = 0
self.best_pol_shown = False
# Main loop timer object
self.mainloop_timer_period = 0.1
self.mainloop = self.create_timer(self.mainloop_timer_period,
self.mainloop_callback,
callback_group=mainloop_callback_group)
def reset_rl_request(self):
self.rl_policy = None
self.rl_weights = None
def active_rl_callback(self, msg):
self.rl_policy = msg.policy
self.rl_weights = msg.weights
self.get_logger().info('Active RL: Called!')
self.active_rl_pending = True
def reset_eval_request(self):
self.eval_policy = None
self.eval_weights = None
def active_rl_eval_callback(self, msg):
self.eval_policy = msg.policy
self.eval_weights = msg.weights
self.get_logger().info('Active RL Eval: Responded!')
self.eval_response_received = True
def next_image(self, policy):
action = policy[self.rl_step]
output = self.env.step(action)
self.rl_reward += output[1]
done = output[2]
self.rl_step += 1
rgb_array = self.env.render()
rgb_shape = rgb_array.shape
red = rgb_array[:, :, 0].flatten().tolist()
green = rgb_array[:, :, 1].flatten().tolist()
blue = rgb_array[:, :, 2].flatten().tolist()
feedback_msg = ImageFeedback()
feedback_msg.height = rgb_shape[0]
feedback_msg.width = rgb_shape[1]
feedback_msg.red = red
feedback_msg.green = green
feedback_msg.blue = blue
self.image_pub.publish(feedback_msg)
if not done and self.rl_step == len(policy):
distance = -(self.env.goal_position - output[0][0])
self.rl_reward += distance * self.distance_penalty
return done
def mainloop_callback(self):
if self.active_rl_pending:
if not self.best_pol_shown:
done = self.next_image(self.rl_policy)
if done:
self.rl_step = 0
self.rl_reward = 0.0
eval_request = ActiveRL()
eval_request.policy = self.rl_policy
eval_request.weights = self.rl_weights
self.eval_pub.publish(eval_request)
self.get_logger().info('Active RL: Called!')
self.best_pol_shown = True
elif self.best_pol_shown:
if not self.eval_response_received:
self.get_logger().info('Active RL: Waiting for Eval!')
if self.eval_response_received:
done = self.next_image(self.eval_policy)
if done:
rl_response = ActiveRLResponse()
rl_response.weights = self.rl_weights
rl_response.reward = self.rl_reward
rl_response.final_step = self.rl_step
self.active_rl_pub.publish(rl_response)
# reset flags and attributes
self.reset_eval_request()
self.reset_rl_request()
self.rl_step = 0
self.rl_reward = 0.0
self.best_pol_shown = False
self.eval_response_received = False
self.active_rl_pending = False
def main(args=None):
rclpy.init(args=args)
active_rl_service = ActiveRLService()
rclpy.spin(active_rl_service)
rclpy.shutdown()
if __name__ == '__main__':
main()