Added BOTorch support
This commit is contained in:
parent
a17dc77234
commit
6cdb7f8711
@ -97,8 +97,6 @@ class BayesianOptimization:
|
|||||||
lower=self.lower_bound,
|
lower=self.lower_bound,
|
||||||
upper=self.upper_bound)
|
upper=self.upper_bound)
|
||||||
|
|
||||||
return x_next
|
|
||||||
|
|
||||||
elif self.acq == "Probability of Improvement":
|
elif self.acq == "Probability of Improvement":
|
||||||
x_next = ProbabilityOfImprovement(self.GP,
|
x_next = ProbabilityOfImprovement(self.GP,
|
||||||
self.X,
|
self.X,
|
||||||
@ -109,8 +107,6 @@ class BayesianOptimization:
|
|||||||
lower=self.lower_bound,
|
lower=self.lower_bound,
|
||||||
upper=self.upper_bound)
|
upper=self.upper_bound)
|
||||||
|
|
||||||
return x_next
|
|
||||||
|
|
||||||
elif self.acq == "Upper Confidence Bound":
|
elif self.acq == "Upper Confidence Bound":
|
||||||
x_next = ConfidenceBound(self.GP,
|
x_next = ConfidenceBound(self.GP,
|
||||||
self.X,
|
self.X,
|
||||||
@ -121,11 +117,11 @@ class BayesianOptimization:
|
|||||||
lower=self.lower_bound,
|
lower=self.lower_bound,
|
||||||
upper=self.upper_bound)
|
upper=self.upper_bound)
|
||||||
|
|
||||||
return x_next
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
return x_next
|
||||||
|
|
||||||
def eval_new_observation(self, x_next):
|
def eval_new_observation(self, x_next):
|
||||||
self.policy_model.weights = x_next
|
self.policy_model.weights = x_next
|
||||||
policy = self.policy_model.rollout()
|
policy = self.policy_model.rollout()
|
||||||
|
@ -0,0 +1,188 @@
|
|||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from botorch.models import SingleTaskGP
|
||||||
|
from botorch.optim import optimize_acqf
|
||||||
|
from gpytorch.kernels import MaternKernel
|
||||||
|
from botorch.fit import fit_gpytorch_mll
|
||||||
|
from gpytorch.mlls import ExactMarginalLogLikelihood
|
||||||
|
|
||||||
|
from botorch.acquisition import UpperConfidenceBound, ExpectedImprovement, ProbabilityOfImprovement
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
from botorch.exceptions.warnings import InputDataWarning, BadInitialCandidatesWarning
|
||||||
|
|
||||||
|
from active_bo_ros.PolicyModel.GaussianRBFModel import GaussianRBF
|
||||||
|
|
||||||
|
torch.set_default_dtype(torch.float64)
|
||||||
|
warnings.filterwarnings("ignore", category=InputDataWarning)
|
||||||
|
warnings.filterwarnings("ignore", category=BadInitialCandidatesWarning)
|
||||||
|
|
||||||
|
|
||||||
|
class BayesianOptimization:
|
||||||
|
def __init__(self, env, nr_steps, nr_init=5, acq="Expected Improvement", nr_weights=6, policy_seed=None):
|
||||||
|
self.env = env
|
||||||
|
self.nr_init = nr_init
|
||||||
|
self.acq = acq
|
||||||
|
|
||||||
|
self.X = None
|
||||||
|
self.X_np = None
|
||||||
|
self.Y_np = None
|
||||||
|
self.GP = None
|
||||||
|
|
||||||
|
self.episode = 0
|
||||||
|
self.counter_array = np.empty((1, 1))
|
||||||
|
self.best_reward = np.empty((1, 1))
|
||||||
|
self.distance_penalty = 0
|
||||||
|
|
||||||
|
self.nr_policy_weights = nr_weights
|
||||||
|
self.nr_steps = nr_steps
|
||||||
|
self.policy_seed = policy_seed
|
||||||
|
|
||||||
|
self.lower_bound = 0
|
||||||
|
self.upper_bound = 1.0
|
||||||
|
|
||||||
|
self.bounds = torch.t(torch.tensor([[self.lower_bound, self.upper_bound]] * self.nr_policy_weights))
|
||||||
|
|
||||||
|
self.policy_model = GaussianRBF(self.nr_policy_weights,
|
||||||
|
self.nr_steps,
|
||||||
|
self.policy_seed,
|
||||||
|
self.lower_bound,
|
||||||
|
self.upper_bound)
|
||||||
|
|
||||||
|
self.eval_X = 200
|
||||||
|
self.eval_restarts = 5
|
||||||
|
|
||||||
|
def reset_bo(self):
|
||||||
|
self.counter_array = np.empty((1, 1))
|
||||||
|
self.GP = None
|
||||||
|
self.episode = 0
|
||||||
|
self.best_reward = np.empty((1, 1))
|
||||||
|
|
||||||
|
def runner(self, policy):
|
||||||
|
env_reward = 0.0
|
||||||
|
step_count = 0
|
||||||
|
|
||||||
|
for i in range(len(policy)):
|
||||||
|
action = policy[i]
|
||||||
|
output = self.env.step(action)
|
||||||
|
|
||||||
|
env_reward += output[1]
|
||||||
|
done = output[2]
|
||||||
|
step_count += 1
|
||||||
|
|
||||||
|
if done:
|
||||||
|
self.counter_array = np.vstack((self.counter_array, step_count))
|
||||||
|
break
|
||||||
|
|
||||||
|
if not done and i == len(policy):
|
||||||
|
distance = -(self.env.goal_position - output[0][0])
|
||||||
|
env_reward += distance * self.distance_penalty
|
||||||
|
self.counter_array = np.vstack((self.counter_array, step_count))
|
||||||
|
|
||||||
|
self.env.reset()
|
||||||
|
return env_reward, step_count
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
self.env.reset()
|
||||||
|
self.reset_bo()
|
||||||
|
|
||||||
|
self.X = torch.zeros((self.nr_init, self.nr_policy_weights))
|
||||||
|
self.X_np = np.zeros((self.nr_init, self.nr_policy_weights))
|
||||||
|
self.Y_np = np.zeros((self.nr_init, 1))
|
||||||
|
|
||||||
|
for i in range(self.nr_init):
|
||||||
|
self.policy_model.random_policy()
|
||||||
|
self.X_np[i, :] = self.policy_model.weights.T.clip(min=-1.0, max=1.0)
|
||||||
|
self.X[i, :] = torch.tensor((self.policy_model.weights.T.clip(min=-1.0, max=1.0) + 1) / 2)
|
||||||
|
policy = self.policy_model.rollout()
|
||||||
|
|
||||||
|
reward, step_count = self.runner(policy)
|
||||||
|
|
||||||
|
self.Y_np[i] = reward
|
||||||
|
|
||||||
|
Y = torch.tensor(self.Y_np)
|
||||||
|
|
||||||
|
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
|
||||||
|
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
||||||
|
fit_gpytorch_mll(mll)
|
||||||
|
|
||||||
|
def next_observation(self):
|
||||||
|
if self.acq == "Expected Improvement":
|
||||||
|
ei = ExpectedImprovement(self.GP, best_f=self.best_reward[-1][0], maximize=True)
|
||||||
|
x_next, _ = optimize_acqf(ei,
|
||||||
|
bounds=self.bounds,
|
||||||
|
num_restarts=self.eval_restarts,
|
||||||
|
raw_samples=self.eval_X,
|
||||||
|
q=1)
|
||||||
|
|
||||||
|
elif self.acq == "Probability of Improvement":
|
||||||
|
poi = ProbabilityOfImprovement(self.GP, best_f=self.best_reward[-1][0], maximize=True)
|
||||||
|
x_next, _ = optimize_acqf(poi,
|
||||||
|
bounds=self.bounds,
|
||||||
|
num_restarts=self.eval_restarts,
|
||||||
|
raw_samples=self.eval_X,
|
||||||
|
q=1)
|
||||||
|
|
||||||
|
elif self.acq == "Upper Confidence Bound":
|
||||||
|
ucb = UpperConfidenceBound(self.GP, beta=2.576, maximize=True)
|
||||||
|
x_next, _ = optimize_acqf(ucb,
|
||||||
|
bounds=self.bounds,
|
||||||
|
num_restarts=self.eval_restarts,
|
||||||
|
raw_samples=self.eval_X,
|
||||||
|
q=1)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
return torch.t(x_next)
|
||||||
|
|
||||||
|
def eval_new_observation(self, x_next):
|
||||||
|
new_weight = x_next.detach().numpy() * 2 - 1
|
||||||
|
self.policy_model.weights = new_weight
|
||||||
|
policy = self.policy_model.rollout()
|
||||||
|
|
||||||
|
reward, step_count = self.runner(policy)
|
||||||
|
|
||||||
|
self.X_np = np.vstack((self.X_np, new_weight.reshape(1, -1)))
|
||||||
|
self.X = torch.vstack((self.X, x_next.reshape(1, -1)))
|
||||||
|
self.Y_np = np.vstack((self.Y_np, reward))
|
||||||
|
|
||||||
|
Y = torch.tensor(self.Y_np)
|
||||||
|
|
||||||
|
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
|
||||||
|
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
||||||
|
fit_gpytorch_mll(mll)
|
||||||
|
|
||||||
|
if self.episode == 0:
|
||||||
|
self.best_reward[0] = max(self.Y_np)
|
||||||
|
else:
|
||||||
|
self.best_reward = np.vstack((self.best_reward, max(self.Y_np)))
|
||||||
|
|
||||||
|
self.episode += 1
|
||||||
|
return step_count
|
||||||
|
|
||||||
|
def add_new_observation(self, reward, x_new):
|
||||||
|
self.X = torch.vstack((self.X, torch.tensor(x_new)))
|
||||||
|
self.Y_np = np.vstack((self.Y_np, reward))
|
||||||
|
|
||||||
|
if self.episode == 0:
|
||||||
|
self.best_reward[0] = max(self.Y_np)
|
||||||
|
else:
|
||||||
|
self.best_reward = np.vstack((self.best_reward, max(self.Y_np)))
|
||||||
|
|
||||||
|
self.episode += 1
|
||||||
|
|
||||||
|
def get_best_result(self):
|
||||||
|
Y = torch.tensor(self.Y_np)
|
||||||
|
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
|
||||||
|
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
||||||
|
fit_gpytorch_mll(mll)
|
||||||
|
|
||||||
|
y_hat = self.GP.posterior(self.X)
|
||||||
|
idx = torch.argmax(y_hat.mean)
|
||||||
|
x_max = self.X_np[idx]
|
||||||
|
|
||||||
|
self.policy_model.weights = x_max
|
||||||
|
best_policy = self.policy_model.rollout().reshape(-1, )
|
||||||
|
|
||||||
|
return best_policy, y_hat.mean[idx].detach().numpy(), x_max
|
72
src/active_bo_ros/active_bo_ros/bo_torch_service.py
Normal file
72
src/active_bo_ros/active_bo_ros/bo_torch_service.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
from active_bo_msgs.srv import BO
|
||||||
|
|
||||||
|
import rclpy
|
||||||
|
from rclpy.node import Node
|
||||||
|
|
||||||
|
from active_bo_ros.BayesianOptimization.BayesianOptimizationTorch import BayesianOptimization
|
||||||
|
from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
class BOService(Node):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__('bo_service')
|
||||||
|
self.srv = self.create_service(BO, 'bo_srv', self.bo_callback)
|
||||||
|
|
||||||
|
self.env = Continuous_MountainCarEnv()
|
||||||
|
self.distance_penalty = 0
|
||||||
|
|
||||||
|
self.nr_init = 3
|
||||||
|
|
||||||
|
def bo_callback(self, request, response):
|
||||||
|
self.get_logger().info('Bayesian Optimization Service started!')
|
||||||
|
nr_weights = request.nr_weights
|
||||||
|
max_steps = request.max_steps
|
||||||
|
nr_episodes = request.nr_episodes
|
||||||
|
nr_runs = request.nr_runs
|
||||||
|
acq = request.acquisition_function
|
||||||
|
self.get_logger().info(acq)
|
||||||
|
|
||||||
|
reward = np.zeros((nr_episodes, nr_runs))
|
||||||
|
best_pol_reward = np.zeros((1, nr_runs))
|
||||||
|
best_policy = np.zeros((max_steps, nr_runs))
|
||||||
|
best_weights = np.zeros((nr_weights, nr_runs))
|
||||||
|
|
||||||
|
BO = BayesianOptimization(self.env,
|
||||||
|
max_steps,
|
||||||
|
nr_init=self.nr_init,
|
||||||
|
acq=acq,
|
||||||
|
nr_weights=nr_weights)
|
||||||
|
|
||||||
|
for i in range(nr_runs):
|
||||||
|
BO.initialize()
|
||||||
|
|
||||||
|
for j in range(nr_episodes):
|
||||||
|
x_next = BO.next_observation()
|
||||||
|
BO.eval_new_observation(x_next)
|
||||||
|
|
||||||
|
best_policy[:, i], best_pol_reward[:, i], best_weights[:, i] = BO.get_best_result()
|
||||||
|
|
||||||
|
reward[:, i] = BO.best_reward.T
|
||||||
|
|
||||||
|
response.reward_mean = np.mean(reward, axis=1).tolist()
|
||||||
|
response.reward_std = np.std(reward, axis=1).tolist()
|
||||||
|
|
||||||
|
best_policy_idx = np.argmax(best_pol_reward)
|
||||||
|
response.best_weights = best_weights[:, best_policy_idx].tolist()
|
||||||
|
response.best_policy = best_policy[:, best_policy_idx].tolist()
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=None):
|
||||||
|
rclpy.init(args=args)
|
||||||
|
|
||||||
|
bo_service = BOService()
|
||||||
|
|
||||||
|
rclpy.spin(bo_service)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
12
src/active_bo_ros/launch/bo_torch_service.launch.py
Executable file
12
src/active_bo_ros/launch/bo_torch_service.launch.py
Executable file
@ -0,0 +1,12 @@
|
|||||||
|
from launch import LaunchDescription
|
||||||
|
from launch_ros.actions import Node
|
||||||
|
|
||||||
|
|
||||||
|
def generate_launch_description():
|
||||||
|
return LaunchDescription([
|
||||||
|
Node(
|
||||||
|
package='active_bo_ros',
|
||||||
|
executable='bo_torch_srv',
|
||||||
|
name='bo_srv'
|
||||||
|
),
|
||||||
|
])
|
@ -30,6 +30,7 @@ setup(
|
|||||||
'policy_srv = active_bo_ros.policy_service:main',
|
'policy_srv = active_bo_ros.policy_service:main',
|
||||||
'rl_srv = active_bo_ros.rl_service:main',
|
'rl_srv = active_bo_ros.rl_service:main',
|
||||||
'bo_srv = active_bo_ros.bo_service:main',
|
'bo_srv = active_bo_ros.bo_service:main',
|
||||||
|
'bo_torch_srv = active_bo_ros.bo_torch_service:main',
|
||||||
'active_bo_srv = active_bo_ros.active_bo_service:main',
|
'active_bo_srv = active_bo_ros.active_bo_service:main',
|
||||||
'active_rl_srv = active_bo_ros.active_rl_service:main',
|
'active_rl_srv = active_bo_ros.active_rl_service:main',
|
||||||
'active_bo_topic = active_bo_ros.active_bo_topic:main',
|
'active_bo_topic = active_bo_ros.active_bo_topic:main',
|
||||||
|
Loading…
Reference in New Issue
Block a user