implemented active bo

bug in webservice
This commit is contained in:
Niko Feith 2023-03-20 17:47:34 +01:00
parent 6ed36c0b84
commit c27675d156
13 changed files with 178 additions and 84 deletions

View File

@ -24,6 +24,8 @@ rosidl_generate_interfaces(${PROJECT_NAME}
"srv/RLRollOut.srv" "srv/RLRollOut.srv"
"srv/BO.srv" "srv/BO.srv"
"srv/ActiveBO.srv" "srv/ActiveBO.srv"
"srv/ActiveRL.srv"
"srv/ActiveRLEval.srv"
"msg/ImageFeedback.msg" "msg/ImageFeedback.msg"
) )

View File

@ -3,7 +3,7 @@ uint16 max_steps
uint16 nr_episodes uint16 nr_episodes
uint16 nr_runs uint16 nr_runs
string acquisition_function string acquisition_function
uint16 epsilon float32 epsilon
--- ---
float32[] best_policy float32[] best_policy
float32[] best_weights float32[] best_weights

View File

@ -0,0 +1,6 @@
float32[] old_policy
float32[] old_weights
---
float32[] new_weights
uint16 final_step
float32 reward

View File

@ -0,0 +1,5 @@
float32[] old_policy
float32[] old_weights
---
float32[] new_policy
float32[] new_weights

View File

@ -145,6 +145,17 @@ class BayesianOptimization:
self.episode += 1 self.episode += 1
return step_count return step_count
def add_new_observation(self, reward, x_new):
self.X = np.vstack((self.X, x_new))
self.Y = np.vstack((self.Y, reward))
if self.episode == 0:
self.best_reward[0] = max(self.Y)
else:
self.best_reward = np.vstack((self.best_reward, max(self.Y)))
self.episode += 1
def get_best_result(self): def get_best_result(self):
y_hat = self.GP.predict(self.X) y_hat = self.GP.predict(self.X)
idx = np.argmax(y_hat) idx = np.argmax(y_hat)

View File

@ -102,7 +102,7 @@ class Continuous_MountainCarEnv(gym.Env):
metadata = { metadata = {
"render_modes": ["human", "rgb_array"], "render_modes": ["human", "rgb_array"],
"render_fps": 30, "render_fps": 60,
} }
def __init__(self, render_mode: Optional[str] = None, goal_velocity=0): def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):

View File

@ -1,4 +1,5 @@
from active_bo_msgs.srv import ActiveBO from active_bo_msgs.srv import ActiveBO
from active_bo_msgs.srv import ActiveRL
import rclpy import rclpy
from rclpy.node import Node from rclpy.node import Node
@ -11,9 +12,11 @@ import numpy as np
class ActiveBOService(Node): class ActiveBOService(Node):
def __init__(self): def __init__(self):
super().__init__('active_bo_servie') super().__init__('active_bo_service')
self.srv = self.create_service(ActiveBO, 'active_bo_srv', self.active_bo_callback) self.srv = self.create_service(ActiveBO, 'active_bo_srv', self.active_bo_callback)
self.active_rl_client = self.create_client(ActiveRL, 'active_rl_srv')
self.env = Continuous_MountainCarEnv() self.env = Continuous_MountainCarEnv()
self.distance_penalty = 0 self.distance_penalty = 0
@ -38,13 +41,23 @@ class ActiveBOService(Node):
nr_init=self.nr_init, nr_init=self.nr_init,
acq=acq, acq=acq,
nr_weights=nr_weights) nr_weights=nr_weights)
arl_request = ActiveRL.Request()
for i in range(nr_runs): for i in range(nr_runs):
BO.initialize() BO.initialize()
for j in range(nr_episodes): for j in range(nr_episodes):
# active part # active part
if np.random.uniform(0.0, 1.0, 1) < epsilon: if (j > 3) and (np.random.uniform(0.0, 1.0, 1) < epsilon):
pass self.get_logger().info('Active User Input')
old_policy, _, old_weights = BO.get_best_result()
arl_request.old_policy = old_policy.tolist()
arl_request.old_weights = old_weights.tolist()
arl_response = self.active_rl_client.call(arl_request)
BO.add_new_observation(arl_response.reward, arl_response.new_weights)
# BO part # BO part
else: else:
x_next = BO.next_observation() x_next = BO.next_observation()

View File

@ -0,0 +1,116 @@
from active_bo_msgs.srv import ActiveRL
from active_bo_msgs.srv import ActiveRLEval
from active_bo_msgs.msg import ImageFeedback
import rclpy
from rclpy.node import Node
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
import numpy as np
class ActiveRLService(Node):
def __init__(self):
super().__init__('active_rl_service')
self.srv = self.create_service(ActiveRL, 'active_rl_srv', self.active_rl_callback)
self.eval_srv = self.create_client(ActiveRLEval, 'active_rl_eval_srv')
self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1)
self.env = Continuous_MountainCarEnv(render_mode='rgb_array')
self.distance_penalty = 0
def active_rl_callback(self, request, response):
feedback_msg = ImageFeedback()
reward = 0
step_count = 0
old_policy = request.old_policy
old_weights = request.old_weights
eval_request = ActiveRLEval.Request()
eval_request.old_policy = old_policy
eval_request.old_weights = old_weights
self.env.reset()
self.get_logger().info('Best policy so far!')
for i in range(len(old_policy)):
action = old_policy[i]
output = self.env.step(action)
done = output[2]
rgb_array = self.env.render()
rgb_shape = rgb_array.shape
red = rgb_array[:, :, 0].flatten().tolist()
green = rgb_array[:, :, 1].flatten().tolist()
blue = rgb_array[:, :, 2].flatten().tolist()
feedback_msg.height = rgb_shape[0]
feedback_msg.width = rgb_shape[1]
feedback_msg.red = red
feedback_msg.green = green
feedback_msg.blue = blue
self.publisher.publish(feedback_msg)
if done:
break
self.get_logger().info('Enter new solution!')
eval_response = self.eval_srv.call(eval_request)
new_policy = eval_response.new_policy.tolist()
for i in range(len(new_policy)):
action = new_policy[i]
output = self.env.step(action)
reward += output[1]
done = output[2]
step_count += 1
rgb_array = self.env.render()
rgb_shape = rgb_array.shape
red = rgb_array[:, :, 0].flatten().tolist()
green = rgb_array[:, :, 1].flatten().tolist()
blue = rgb_array[:, :, 2].flatten().tolist()
feedback_msg.height = rgb_shape[0]
feedback_msg.width = rgb_shape[1]
feedback_msg.red = red
feedback_msg.green = green
feedback_msg.blue = blue
self.publisher.publish(feedback_msg)
if done:
break
if not done and i == len(new_policy):
distance = -(self.env.goal_position - output[0][0])
reward += distance * self.distance_penalty
response.new_weights = eval_response.Response.new_weights
response.reward = reward
response.final_step = step_count
return response
def main(args=None):
rclpy.init(args=args)
active_rl_service = ActiveRLService()
rclpy.spin(active_rl_service)
if __name__ == '__main__':
main()

View File

@ -6,6 +6,7 @@ from rclpy.node import Node
from active_bo_ros.PolicyModel.GaussianRBFModel import GaussianRBF from active_bo_ros.PolicyModel.GaussianRBFModel import GaussianRBF
import numpy as np import numpy as np
class PolicyService(Node): class PolicyService(Node):
def __init__(self): def __init__(self):
super().__init__('policy_service') super().__init__('policy_service')

View File

@ -1,11 +0,0 @@
from launch import LaunchDescription
from launch_ros.actions import Node
def generate_launch_description():
return LaunchDescription([
Node(
package='active_bo_ros',
executable='rl_action',
name='rl_action'
),
])

View File

@ -1,67 +0,0 @@
from active_bo_msgs.srv import RLRollOut
from active_bo_msgs.msg import ImageFeedback
import rclpy
from rclpy.node import Node
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
import numpy as np
import time
class RLService(Node):
def __init__(self):
super().__init__('rl_service')
self.srv = self.create_service(RLRollOut, 'rl_srv', self.rl_callback)
self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1)
self.env = Continuous_MountainCarEnv(render_mode='rgb_array')
self.distance_penalty = 0
def rl_callback(self, request, response):
feedback_msg = ImageFeedback()
reward = 0
step_count = 0
policy = request.policy
for i in range(len(policy)):
action = policy[i]
output = self.env.step(action)
reward += output[1]
done = output[2]
step_count += 1
rgb_array = output[5]
rgb_shape = rgb_array.shape
feedback_msg.height = rgb_shape[0]
feedback_msg.width = rgb_shape[1]
self.publisher.publish(feedback_msg)
if done:
break
distance = -(self.env.goal_position - output[0][0])
reward += distance * self.distance_penalty
time.sleep(0.01)
response.reward = reward
response.final_step = step_count
return response
def main(args=None):
rclpy.init(args=args)
rl_service = RLService()
rclpy.spin(rl_service)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,16 @@
from launch import LaunchDescription
from launch_ros.actions import Node
def generate_launch_description():
return LaunchDescription([
Node(
package='active_bo_ros',
executable='active_bo_srv',
name='active_bo_service'
),
Node(
package='active_bo_ros',
executable='active_rl_srv',
name='active_rl_service'
),
])

View File

@ -29,7 +29,9 @@ setup(
'console_scripts': [ 'console_scripts': [
'policy_srv = active_bo_ros.policy_service:main', 'policy_srv = active_bo_ros.policy_service:main',
'rl_srv = active_bo_ros.rl_service:main', 'rl_srv = active_bo_ros.rl_service:main',
'bo_srv = active_bo_ros.bo_service:main' 'bo_srv = active_bo_ros.bo_service:main',
'active_bo_srv = active_bo_ros.active_bo_service:main',
'active_rl_srv = active_bo_ros.active_rl_service:main',
], ],
}, },
) )