implemented active bo
bug in webservice
This commit is contained in:
parent
6ed36c0b84
commit
c27675d156
@ -24,6 +24,8 @@ rosidl_generate_interfaces(${PROJECT_NAME}
|
|||||||
"srv/RLRollOut.srv"
|
"srv/RLRollOut.srv"
|
||||||
"srv/BO.srv"
|
"srv/BO.srv"
|
||||||
"srv/ActiveBO.srv"
|
"srv/ActiveBO.srv"
|
||||||
|
"srv/ActiveRL.srv"
|
||||||
|
"srv/ActiveRLEval.srv"
|
||||||
"msg/ImageFeedback.msg"
|
"msg/ImageFeedback.msg"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ uint16 max_steps
|
|||||||
uint16 nr_episodes
|
uint16 nr_episodes
|
||||||
uint16 nr_runs
|
uint16 nr_runs
|
||||||
string acquisition_function
|
string acquisition_function
|
||||||
uint16 epsilon
|
float32 epsilon
|
||||||
---
|
---
|
||||||
float32[] best_policy
|
float32[] best_policy
|
||||||
float32[] best_weights
|
float32[] best_weights
|
||||||
|
6
src/active_bo_msgs/srv/ActiveRL.srv
Normal file
6
src/active_bo_msgs/srv/ActiveRL.srv
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
float32[] old_policy
|
||||||
|
float32[] old_weights
|
||||||
|
---
|
||||||
|
float32[] new_weights
|
||||||
|
uint16 final_step
|
||||||
|
float32 reward
|
5
src/active_bo_msgs/srv/ActiveRLEval.srv
Normal file
5
src/active_bo_msgs/srv/ActiveRLEval.srv
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
float32[] old_policy
|
||||||
|
float32[] old_weights
|
||||||
|
---
|
||||||
|
float32[] new_policy
|
||||||
|
float32[] new_weights
|
@ -145,6 +145,17 @@ class BayesianOptimization:
|
|||||||
self.episode += 1
|
self.episode += 1
|
||||||
return step_count
|
return step_count
|
||||||
|
|
||||||
|
def add_new_observation(self, reward, x_new):
|
||||||
|
self.X = np.vstack((self.X, x_new))
|
||||||
|
self.Y = np.vstack((self.Y, reward))
|
||||||
|
|
||||||
|
if self.episode == 0:
|
||||||
|
self.best_reward[0] = max(self.Y)
|
||||||
|
else:
|
||||||
|
self.best_reward = np.vstack((self.best_reward, max(self.Y)))
|
||||||
|
|
||||||
|
self.episode += 1
|
||||||
|
|
||||||
def get_best_result(self):
|
def get_best_result(self):
|
||||||
y_hat = self.GP.predict(self.X)
|
y_hat = self.GP.predict(self.X)
|
||||||
idx = np.argmax(y_hat)
|
idx = np.argmax(y_hat)
|
||||||
|
@ -102,7 +102,7 @@ class Continuous_MountainCarEnv(gym.Env):
|
|||||||
|
|
||||||
metadata = {
|
metadata = {
|
||||||
"render_modes": ["human", "rgb_array"],
|
"render_modes": ["human", "rgb_array"],
|
||||||
"render_fps": 30,
|
"render_fps": 60,
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
|
def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from active_bo_msgs.srv import ActiveBO
|
from active_bo_msgs.srv import ActiveBO
|
||||||
|
from active_bo_msgs.srv import ActiveRL
|
||||||
|
|
||||||
import rclpy
|
import rclpy
|
||||||
from rclpy.node import Node
|
from rclpy.node import Node
|
||||||
@ -11,9 +12,11 @@ import numpy as np
|
|||||||
|
|
||||||
class ActiveBOService(Node):
|
class ActiveBOService(Node):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__('active_bo_servie')
|
super().__init__('active_bo_service')
|
||||||
self.srv = self.create_service(ActiveBO, 'active_bo_srv', self.active_bo_callback)
|
self.srv = self.create_service(ActiveBO, 'active_bo_srv', self.active_bo_callback)
|
||||||
|
|
||||||
|
self.active_rl_client = self.create_client(ActiveRL, 'active_rl_srv')
|
||||||
|
|
||||||
self.env = Continuous_MountainCarEnv()
|
self.env = Continuous_MountainCarEnv()
|
||||||
self.distance_penalty = 0
|
self.distance_penalty = 0
|
||||||
|
|
||||||
@ -38,13 +41,23 @@ class ActiveBOService(Node):
|
|||||||
nr_init=self.nr_init,
|
nr_init=self.nr_init,
|
||||||
acq=acq,
|
acq=acq,
|
||||||
nr_weights=nr_weights)
|
nr_weights=nr_weights)
|
||||||
|
|
||||||
|
arl_request = ActiveRL.Request()
|
||||||
for i in range(nr_runs):
|
for i in range(nr_runs):
|
||||||
BO.initialize()
|
BO.initialize()
|
||||||
|
|
||||||
for j in range(nr_episodes):
|
for j in range(nr_episodes):
|
||||||
# active part
|
# active part
|
||||||
if np.random.uniform(0.0, 1.0, 1) < epsilon:
|
if (j > 3) and (np.random.uniform(0.0, 1.0, 1) < epsilon):
|
||||||
pass
|
self.get_logger().info('Active User Input')
|
||||||
|
old_policy, _, old_weights = BO.get_best_result()
|
||||||
|
|
||||||
|
arl_request.old_policy = old_policy.tolist()
|
||||||
|
arl_request.old_weights = old_weights.tolist()
|
||||||
|
arl_response = self.active_rl_client.call(arl_request)
|
||||||
|
|
||||||
|
BO.add_new_observation(arl_response.reward, arl_response.new_weights)
|
||||||
|
|
||||||
# BO part
|
# BO part
|
||||||
else:
|
else:
|
||||||
x_next = BO.next_observation()
|
x_next = BO.next_observation()
|
||||||
|
116
src/active_bo_ros/active_bo_ros/active_rl_service.py
Normal file
116
src/active_bo_ros/active_bo_ros/active_rl_service.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
from active_bo_msgs.srv import ActiveRL
|
||||||
|
from active_bo_msgs.srv import ActiveRLEval
|
||||||
|
from active_bo_msgs.msg import ImageFeedback
|
||||||
|
|
||||||
|
import rclpy
|
||||||
|
from rclpy.node import Node
|
||||||
|
|
||||||
|
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class ActiveRLService(Node):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__('active_rl_service')
|
||||||
|
self.srv = self.create_service(ActiveRL, 'active_rl_srv', self.active_rl_callback)
|
||||||
|
self.eval_srv = self.create_client(ActiveRLEval, 'active_rl_eval_srv')
|
||||||
|
|
||||||
|
self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1)
|
||||||
|
|
||||||
|
self.env = Continuous_MountainCarEnv(render_mode='rgb_array')
|
||||||
|
self.distance_penalty = 0
|
||||||
|
|
||||||
|
def active_rl_callback(self, request, response):
|
||||||
|
|
||||||
|
feedback_msg = ImageFeedback()
|
||||||
|
|
||||||
|
reward = 0
|
||||||
|
step_count = 0
|
||||||
|
old_policy = request.old_policy
|
||||||
|
old_weights = request.old_weights
|
||||||
|
|
||||||
|
eval_request = ActiveRLEval.Request()
|
||||||
|
eval_request.old_policy = old_policy
|
||||||
|
eval_request.old_weights = old_weights
|
||||||
|
|
||||||
|
self.env.reset()
|
||||||
|
|
||||||
|
self.get_logger().info('Best policy so far!')
|
||||||
|
|
||||||
|
for i in range(len(old_policy)):
|
||||||
|
action = old_policy[i]
|
||||||
|
output = self.env.step(action)
|
||||||
|
|
||||||
|
done = output[2]
|
||||||
|
|
||||||
|
rgb_array = self.env.render()
|
||||||
|
rgb_shape = rgb_array.shape
|
||||||
|
|
||||||
|
red = rgb_array[:, :, 0].flatten().tolist()
|
||||||
|
green = rgb_array[:, :, 1].flatten().tolist()
|
||||||
|
blue = rgb_array[:, :, 2].flatten().tolist()
|
||||||
|
|
||||||
|
feedback_msg.height = rgb_shape[0]
|
||||||
|
feedback_msg.width = rgb_shape[1]
|
||||||
|
feedback_msg.red = red
|
||||||
|
feedback_msg.green = green
|
||||||
|
feedback_msg.blue = blue
|
||||||
|
|
||||||
|
self.publisher.publish(feedback_msg)
|
||||||
|
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
|
||||||
|
self.get_logger().info('Enter new solution!')
|
||||||
|
eval_response = self.eval_srv.call(eval_request)
|
||||||
|
|
||||||
|
new_policy = eval_response.new_policy.tolist()
|
||||||
|
|
||||||
|
for i in range(len(new_policy)):
|
||||||
|
action = new_policy[i]
|
||||||
|
output = self.env.step(action)
|
||||||
|
|
||||||
|
reward += output[1]
|
||||||
|
done = output[2]
|
||||||
|
step_count += 1
|
||||||
|
|
||||||
|
rgb_array = self.env.render()
|
||||||
|
rgb_shape = rgb_array.shape
|
||||||
|
|
||||||
|
red = rgb_array[:, :, 0].flatten().tolist()
|
||||||
|
green = rgb_array[:, :, 1].flatten().tolist()
|
||||||
|
blue = rgb_array[:, :, 2].flatten().tolist()
|
||||||
|
|
||||||
|
feedback_msg.height = rgb_shape[0]
|
||||||
|
feedback_msg.width = rgb_shape[1]
|
||||||
|
feedback_msg.red = red
|
||||||
|
feedback_msg.green = green
|
||||||
|
feedback_msg.blue = blue
|
||||||
|
|
||||||
|
self.publisher.publish(feedback_msg)
|
||||||
|
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not done and i == len(new_policy):
|
||||||
|
distance = -(self.env.goal_position - output[0][0])
|
||||||
|
reward += distance * self.distance_penalty
|
||||||
|
|
||||||
|
response.new_weights = eval_response.Response.new_weights
|
||||||
|
response.reward = reward
|
||||||
|
response.final_step = step_count
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=None):
|
||||||
|
rclpy.init(args=args)
|
||||||
|
|
||||||
|
active_rl_service = ActiveRLService()
|
||||||
|
|
||||||
|
rclpy.spin(active_rl_service)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -6,6 +6,7 @@ from rclpy.node import Node
|
|||||||
from active_bo_ros.PolicyModel.GaussianRBFModel import GaussianRBF
|
from active_bo_ros.PolicyModel.GaussianRBFModel import GaussianRBF
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class PolicyService(Node):
|
class PolicyService(Node):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__('policy_service')
|
super().__init__('policy_service')
|
||||||
|
@ -1,11 +0,0 @@
|
|||||||
from launch import LaunchDescription
|
|
||||||
from launch_ros.actions import Node
|
|
||||||
|
|
||||||
def generate_launch_description():
|
|
||||||
return LaunchDescription([
|
|
||||||
Node(
|
|
||||||
package='active_bo_ros',
|
|
||||||
executable='rl_action',
|
|
||||||
name='rl_action'
|
|
||||||
),
|
|
||||||
])
|
|
@ -1,67 +0,0 @@
|
|||||||
from active_bo_msgs.srv import RLRollOut
|
|
||||||
from active_bo_msgs.msg import ImageFeedback
|
|
||||||
|
|
||||||
import rclpy
|
|
||||||
from rclpy.node import Node
|
|
||||||
|
|
||||||
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import time
|
|
||||||
|
|
||||||
class RLService(Node):
|
|
||||||
def __init__(self):
|
|
||||||
super().__init__('rl_service')
|
|
||||||
self.srv = self.create_service(RLRollOut, 'rl_srv', self.rl_callback)
|
|
||||||
|
|
||||||
self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1)
|
|
||||||
|
|
||||||
self.env = Continuous_MountainCarEnv(render_mode='rgb_array')
|
|
||||||
self.distance_penalty = 0
|
|
||||||
|
|
||||||
def rl_callback(self, request, response):
|
|
||||||
|
|
||||||
feedback_msg = ImageFeedback()
|
|
||||||
|
|
||||||
reward = 0
|
|
||||||
step_count = 0
|
|
||||||
policy = request.policy
|
|
||||||
|
|
||||||
for i in range(len(policy)):
|
|
||||||
action = policy[i]
|
|
||||||
output = self.env.step(action)
|
|
||||||
|
|
||||||
reward += output[1]
|
|
||||||
done = output[2]
|
|
||||||
step_count += 1
|
|
||||||
|
|
||||||
rgb_array = output[5]
|
|
||||||
rgb_shape = rgb_array.shape
|
|
||||||
|
|
||||||
feedback_msg.height = rgb_shape[0]
|
|
||||||
feedback_msg.width = rgb_shape[1]
|
|
||||||
|
|
||||||
self.publisher.publish(feedback_msg)
|
|
||||||
|
|
||||||
if done:
|
|
||||||
break
|
|
||||||
|
|
||||||
distance = -(self.env.goal_position - output[0][0])
|
|
||||||
reward += distance * self.distance_penalty
|
|
||||||
|
|
||||||
time.sleep(0.01)
|
|
||||||
|
|
||||||
response.reward = reward
|
|
||||||
response.final_step = step_count
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
def main(args=None):
|
|
||||||
rclpy.init(args=args)
|
|
||||||
|
|
||||||
rl_service = RLService()
|
|
||||||
|
|
||||||
rclpy.spin(rl_service)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
16
src/active_bo_ros/launch/active_bo_service.launch.py
Executable file
16
src/active_bo_ros/launch/active_bo_service.launch.py
Executable file
@ -0,0 +1,16 @@
|
|||||||
|
from launch import LaunchDescription
|
||||||
|
from launch_ros.actions import Node
|
||||||
|
|
||||||
|
def generate_launch_description():
|
||||||
|
return LaunchDescription([
|
||||||
|
Node(
|
||||||
|
package='active_bo_ros',
|
||||||
|
executable='active_bo_srv',
|
||||||
|
name='active_bo_service'
|
||||||
|
),
|
||||||
|
Node(
|
||||||
|
package='active_bo_ros',
|
||||||
|
executable='active_rl_srv',
|
||||||
|
name='active_rl_service'
|
||||||
|
),
|
||||||
|
])
|
@ -29,7 +29,9 @@ setup(
|
|||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
'policy_srv = active_bo_ros.policy_service:main',
|
'policy_srv = active_bo_ros.policy_service:main',
|
||||||
'rl_srv = active_bo_ros.rl_service:main',
|
'rl_srv = active_bo_ros.rl_service:main',
|
||||||
'bo_srv = active_bo_ros.bo_service:main'
|
'bo_srv = active_bo_ros.bo_service:main',
|
||||||
|
'active_bo_srv = active_bo_ros.active_bo_service:main',
|
||||||
|
'active_rl_srv = active_bo_ros.active_rl_service:main',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user