started with active bo
This commit is contained in:
parent
1416e72675
commit
6ed36c0b84
@ -23,6 +23,7 @@ rosidl_generate_interfaces(${PROJECT_NAME}
|
|||||||
"srv/WeightToPolicy.srv"
|
"srv/WeightToPolicy.srv"
|
||||||
"srv/RLRollOut.srv"
|
"srv/RLRollOut.srv"
|
||||||
"srv/BO.srv"
|
"srv/BO.srv"
|
||||||
|
"srv/ActiveBO.srv"
|
||||||
"msg/ImageFeedback.msg"
|
"msg/ImageFeedback.msg"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
11
src/active_bo_msgs/srv/ActiveBO.srv
Normal file
11
src/active_bo_msgs/srv/ActiveBO.srv
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
uint16 nr_weights
|
||||||
|
uint16 max_steps
|
||||||
|
uint16 nr_episodes
|
||||||
|
uint16 nr_runs
|
||||||
|
string acquisition_function
|
||||||
|
uint16 epsilon
|
||||||
|
---
|
||||||
|
float32[] best_policy
|
||||||
|
float32[] best_weights
|
||||||
|
float32[] reward_mean
|
||||||
|
float32[] reward_std
|
@ -1,5 +1,6 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
def ConfidenceBound(gp, X, nr_test, nr_weights, lam=1.2, seed=None, lower=-1.0, upper=1.0):
|
def ConfidenceBound(gp, X, nr_test, nr_weights, lam=1.2, seed=None, lower=-1.0, upper=1.0):
|
||||||
y_hat = gp.predict(X)
|
y_hat = gp.predict(X)
|
||||||
best_y = max(y_hat)
|
best_y = max(y_hat)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.stats import norm
|
from scipy.stats import norm
|
||||||
|
|
||||||
|
|
||||||
def ExpectedImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0):
|
def ExpectedImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0):
|
||||||
y_hat = gp.predict(X)
|
y_hat = gp.predict(X)
|
||||||
best_y = max(y_hat)
|
best_y = max(y_hat)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.stats import norm
|
from scipy.stats import norm
|
||||||
|
|
||||||
|
|
||||||
def ProbabilityOfImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0):
|
def ProbabilityOfImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0):
|
||||||
y_hat = gp.predict(X)
|
y_hat = gp.predict(X)
|
||||||
best_y = max(y_hat)
|
best_y = max(y_hat)
|
||||||
|
75
src/active_bo_ros/active_bo_ros/active_bo_service.py
Normal file
75
src/active_bo_ros/active_bo_ros/active_bo_service.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
from active_bo_msgs.srv import ActiveBO
|
||||||
|
|
||||||
|
import rclpy
|
||||||
|
from rclpy.node import Node
|
||||||
|
|
||||||
|
from active_bo_ros.BayesianOptimization.BayesianOptimization import BayesianOptimization
|
||||||
|
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class ActiveBOService(Node):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__('active_bo_servie')
|
||||||
|
self.srv = self.create_service(ActiveBO, 'active_bo_srv', self.active_bo_callback)
|
||||||
|
|
||||||
|
self.env = Continuous_MountainCarEnv()
|
||||||
|
self.distance_penalty = 0
|
||||||
|
|
||||||
|
self.nr_init = 3
|
||||||
|
|
||||||
|
def active_bo_callback(self, request, response):
|
||||||
|
self.get_logger().info('Active Bayesian Optimization Service started!')
|
||||||
|
nr_weights = request.nr_weights
|
||||||
|
max_steps = request.max_steps
|
||||||
|
nr_episodes = request.nr_episodes
|
||||||
|
nr_runs = request.nr_runs
|
||||||
|
acq = request.acquisition_function
|
||||||
|
epsilon = request.epsilon
|
||||||
|
|
||||||
|
reward = np.zeros((nr_episodes, nr_runs))
|
||||||
|
best_pol_reward = np.zeros((1, nr_runs))
|
||||||
|
best_policy = np.zeros((max_steps, nr_runs))
|
||||||
|
best_weights = np.zeros((nr_weights, nr_runs))
|
||||||
|
|
||||||
|
BO = BayesianOptimization(self.env,
|
||||||
|
max_steps,
|
||||||
|
nr_init=self.nr_init,
|
||||||
|
acq=acq,
|
||||||
|
nr_weights=nr_weights)
|
||||||
|
for i in range(nr_runs):
|
||||||
|
BO.initialize()
|
||||||
|
|
||||||
|
for j in range(nr_episodes):
|
||||||
|
# active part
|
||||||
|
if np.random.uniform(0.0, 1.0, 1) < epsilon:
|
||||||
|
pass
|
||||||
|
# BO part
|
||||||
|
else:
|
||||||
|
x_next = BO.next_observation()
|
||||||
|
BO.eval_new_observation(x_next)
|
||||||
|
|
||||||
|
best_policy[:, i], best_pol_reward[:, i], best_weights[:, i] = BO.get_best_result()
|
||||||
|
|
||||||
|
reward[:, i] = BO.best_reward.T
|
||||||
|
|
||||||
|
response.reward_mean = np.mean(reward, axis=1).tolist()
|
||||||
|
response.reward_std = np.std(reward, axis=1).tolist()
|
||||||
|
|
||||||
|
best_policy_idx = np.argmax(best_pol_reward)
|
||||||
|
response.best_weights = best_weights[:, best_policy_idx].tolist()
|
||||||
|
response.best_policy = best_policy[:, best_policy_idx].tolist()
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=None):
|
||||||
|
rclpy.init(args=args)
|
||||||
|
|
||||||
|
active_bo_service = ActiveBOService()
|
||||||
|
|
||||||
|
rclpy.spin(active_bo_service)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
47
src/active_bo_ros/launch/launch_active_bo.launch.py
Executable file
47
src/active_bo_ros/launch/launch_active_bo.launch.py
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
from launch import LaunchDescription
|
||||||
|
from launch.actions import IncludeLaunchDescription
|
||||||
|
from launch.launch_description_sources import PythonLaunchDescriptionSource
|
||||||
|
|
||||||
|
from ament_index_python import get_package_share_directory
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def generate_launch_description():
|
||||||
|
websocket_launch = IncludeLaunchDescription(
|
||||||
|
PythonLaunchDescriptionSource(
|
||||||
|
os.path.join(
|
||||||
|
get_package_share_directory('active_bo_ros'),
|
||||||
|
'rosbridge_server.launch.py'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
policy_launch = IncludeLaunchDescription(
|
||||||
|
PythonLaunchDescriptionSource(
|
||||||
|
os.path.join(
|
||||||
|
get_package_share_directory('active_bo_ros'),
|
||||||
|
'policy_service.launch.py'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
rl_launch = IncludeLaunchDescription(
|
||||||
|
PythonLaunchDescriptionSource(
|
||||||
|
os.path.join(
|
||||||
|
get_package_share_directory('active_bo_ros'),
|
||||||
|
'rl_service.launch.py'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
bo_launch = IncludeLaunchDescription(
|
||||||
|
PythonLaunchDescriptionSource(
|
||||||
|
os.path.join(
|
||||||
|
get_package_share_directory('active_bo_ros'),
|
||||||
|
'bo_service.launch.py'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return LaunchDescription([
|
||||||
|
websocket_launch,
|
||||||
|
policy_launch,
|
||||||
|
rl_launch,
|
||||||
|
bo_launch
|
||||||
|
])
|
@ -1,6 +1,7 @@
|
|||||||
from launch import LaunchDescription
|
from launch import LaunchDescription
|
||||||
from launch_ros.actions import Node
|
from launch_ros.actions import Node
|
||||||
|
|
||||||
|
|
||||||
def generate_launch_description():
|
def generate_launch_description():
|
||||||
return LaunchDescription([
|
return LaunchDescription([
|
||||||
Node(
|
Node(
|
||||||
|
21
src/active_bo_ros/launch/rosbridge_server.launch.py
Executable file
21
src/active_bo_ros/launch/rosbridge_server.launch.py
Executable file
@ -0,0 +1,21 @@
|
|||||||
|
from launch import LaunchDescription
|
||||||
|
from launch.actions import IncludeLaunchDescription
|
||||||
|
from launch_xml.launch_description_sources import XMLLaunchDescriptionSource
|
||||||
|
|
||||||
|
from ament_index_python import get_package_share_directory
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def generate_launch_description():
|
||||||
|
rosbridge_launch = IncludeLaunchDescription(
|
||||||
|
XMLLaunchDescriptionSource(
|
||||||
|
os.path.join(
|
||||||
|
get_package_share_directory('rosbridge_server'),
|
||||||
|
'launch/rosbridge_websocket_launch.xml'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return LaunchDescription([
|
||||||
|
rosbridge_launch
|
||||||
|
])
|
Loading…
Reference in New Issue
Block a user