started with active bo

This commit is contained in:
Niko Feith 2023-03-16 14:27:41 +01:00
parent 1416e72675
commit 6ed36c0b84
9 changed files with 159 additions and 0 deletions

View File

@ -23,6 +23,7 @@ rosidl_generate_interfaces(${PROJECT_NAME}
"srv/WeightToPolicy.srv" "srv/WeightToPolicy.srv"
"srv/RLRollOut.srv" "srv/RLRollOut.srv"
"srv/BO.srv" "srv/BO.srv"
"srv/ActiveBO.srv"
"msg/ImageFeedback.msg" "msg/ImageFeedback.msg"
) )

View File

@ -0,0 +1,11 @@
uint16 nr_weights
uint16 max_steps
uint16 nr_episodes
uint16 nr_runs
string acquisition_function
uint16 epsilon
---
float32[] best_policy
float32[] best_weights
float32[] reward_mean
float32[] reward_std

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
def ConfidenceBound(gp, X, nr_test, nr_weights, lam=1.2, seed=None, lower=-1.0, upper=1.0): def ConfidenceBound(gp, X, nr_test, nr_weights, lam=1.2, seed=None, lower=-1.0, upper=1.0):
y_hat = gp.predict(X) y_hat = gp.predict(X)
best_y = max(y_hat) best_y = max(y_hat)

View File

@ -1,6 +1,7 @@
import numpy as np import numpy as np
from scipy.stats import norm from scipy.stats import norm
def ExpectedImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0): def ExpectedImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0):
y_hat = gp.predict(X) y_hat = gp.predict(X)
best_y = max(y_hat) best_y = max(y_hat)

View File

@ -1,6 +1,7 @@
import numpy as np import numpy as np
from scipy.stats import norm from scipy.stats import norm
def ProbabilityOfImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0): def ProbabilityOfImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0):
y_hat = gp.predict(X) y_hat = gp.predict(X)
best_y = max(y_hat) best_y = max(y_hat)

View File

@ -0,0 +1,75 @@
from active_bo_msgs.srv import ActiveBO
import rclpy
from rclpy.node import Node
from active_bo_ros.BayesianOptimization.BayesianOptimization import BayesianOptimization
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
import numpy as np
class ActiveBOService(Node):
def __init__(self):
super().__init__('active_bo_servie')
self.srv = self.create_service(ActiveBO, 'active_bo_srv', self.active_bo_callback)
self.env = Continuous_MountainCarEnv()
self.distance_penalty = 0
self.nr_init = 3
def active_bo_callback(self, request, response):
self.get_logger().info('Active Bayesian Optimization Service started!')
nr_weights = request.nr_weights
max_steps = request.max_steps
nr_episodes = request.nr_episodes
nr_runs = request.nr_runs
acq = request.acquisition_function
epsilon = request.epsilon
reward = np.zeros((nr_episodes, nr_runs))
best_pol_reward = np.zeros((1, nr_runs))
best_policy = np.zeros((max_steps, nr_runs))
best_weights = np.zeros((nr_weights, nr_runs))
BO = BayesianOptimization(self.env,
max_steps,
nr_init=self.nr_init,
acq=acq,
nr_weights=nr_weights)
for i in range(nr_runs):
BO.initialize()
for j in range(nr_episodes):
# active part
if np.random.uniform(0.0, 1.0, 1) < epsilon:
pass
# BO part
else:
x_next = BO.next_observation()
BO.eval_new_observation(x_next)
best_policy[:, i], best_pol_reward[:, i], best_weights[:, i] = BO.get_best_result()
reward[:, i] = BO.best_reward.T
response.reward_mean = np.mean(reward, axis=1).tolist()
response.reward_std = np.std(reward, axis=1).tolist()
best_policy_idx = np.argmax(best_pol_reward)
response.best_weights = best_weights[:, best_policy_idx].tolist()
response.best_policy = best_policy[:, best_policy_idx].tolist()
return response
def main(args=None):
rclpy.init(args=args)
active_bo_service = ActiveBOService()
rclpy.spin(active_bo_service)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,47 @@
from launch import LaunchDescription
from launch.actions import IncludeLaunchDescription
from launch.launch_description_sources import PythonLaunchDescriptionSource
from ament_index_python import get_package_share_directory
import os
def generate_launch_description():
websocket_launch = IncludeLaunchDescription(
PythonLaunchDescriptionSource(
os.path.join(
get_package_share_directory('active_bo_ros'),
'rosbridge_server.launch.py'
)
)
)
policy_launch = IncludeLaunchDescription(
PythonLaunchDescriptionSource(
os.path.join(
get_package_share_directory('active_bo_ros'),
'policy_service.launch.py'
)
)
)
rl_launch = IncludeLaunchDescription(
PythonLaunchDescriptionSource(
os.path.join(
get_package_share_directory('active_bo_ros'),
'rl_service.launch.py'
)
)
)
bo_launch = IncludeLaunchDescription(
PythonLaunchDescriptionSource(
os.path.join(
get_package_share_directory('active_bo_ros'),
'bo_service.launch.py'
)
)
)
return LaunchDescription([
websocket_launch,
policy_launch,
rl_launch,
bo_launch
])

View File

@ -1,6 +1,7 @@
from launch import LaunchDescription from launch import LaunchDescription
from launch_ros.actions import Node from launch_ros.actions import Node
def generate_launch_description(): def generate_launch_description():
return LaunchDescription([ return LaunchDescription([
Node( Node(

View File

@ -0,0 +1,21 @@
from launch import LaunchDescription
from launch.actions import IncludeLaunchDescription
from launch_xml.launch_description_sources import XMLLaunchDescriptionSource
from ament_index_python import get_package_share_directory
import os
def generate_launch_description():
rosbridge_launch = IncludeLaunchDescription(
XMLLaunchDescriptionSource(
os.path.join(
get_package_share_directory('rosbridge_server'),
'launch/rosbridge_websocket_launch.xml'
)
)
)
return LaunchDescription([
rosbridge_launch
])