diff --git a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py index c4b2f6b..4be3572 100644 --- a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py +++ b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py @@ -97,8 +97,6 @@ class BayesianOptimization: lower=self.lower_bound, upper=self.upper_bound) - return x_next - elif self.acq == "Probability of Improvement": x_next = ProbabilityOfImprovement(self.GP, self.X, @@ -109,8 +107,6 @@ class BayesianOptimization: lower=self.lower_bound, upper=self.upper_bound) - return x_next - elif self.acq == "Upper Confidence Bound": x_next = ConfidenceBound(self.GP, self.X, @@ -121,11 +117,11 @@ class BayesianOptimization: lower=self.lower_bound, upper=self.upper_bound) - return x_next - else: raise NotImplementedError + return x_next + def eval_new_observation(self, x_next): self.policy_model.weights = x_next policy = self.policy_model.rollout() diff --git a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimizationTorch.py b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimizationTorch.py new file mode 100644 index 0000000..112b9c5 --- /dev/null +++ b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimizationTorch.py @@ -0,0 +1,188 @@ +import numpy as np +import torch +from botorch.models import SingleTaskGP +from botorch.optim import optimize_acqf +from gpytorch.kernels import MaternKernel +from botorch.fit import fit_gpytorch_mll +from gpytorch.mlls import ExactMarginalLogLikelihood + +from botorch.acquisition import UpperConfidenceBound, ExpectedImprovement, ProbabilityOfImprovement + +import warnings +from botorch.exceptions.warnings import InputDataWarning, BadInitialCandidatesWarning + +from active_bo_ros.PolicyModel.GaussianRBFModel import GaussianRBF + +torch.set_default_dtype(torch.float64) +warnings.filterwarnings("ignore", category=InputDataWarning) +warnings.filterwarnings("ignore", category=BadInitialCandidatesWarning) + + +class BayesianOptimization: + def __init__(self, env, nr_steps, nr_init=5, acq="Expected Improvement", nr_weights=6, policy_seed=None): + self.env = env + self.nr_init = nr_init + self.acq = acq + + self.X = None + self.X_np = None + self.Y_np = None + self.GP = None + + self.episode = 0 + self.counter_array = np.empty((1, 1)) + self.best_reward = np.empty((1, 1)) + self.distance_penalty = 0 + + self.nr_policy_weights = nr_weights + self.nr_steps = nr_steps + self.policy_seed = policy_seed + + self.lower_bound = 0 + self.upper_bound = 1.0 + + self.bounds = torch.t(torch.tensor([[self.lower_bound, self.upper_bound]] * self.nr_policy_weights)) + + self.policy_model = GaussianRBF(self.nr_policy_weights, + self.nr_steps, + self.policy_seed, + self.lower_bound, + self.upper_bound) + + self.eval_X = 200 + self.eval_restarts = 5 + + def reset_bo(self): + self.counter_array = np.empty((1, 1)) + self.GP = None + self.episode = 0 + self.best_reward = np.empty((1, 1)) + + def runner(self, policy): + env_reward = 0.0 + step_count = 0 + + for i in range(len(policy)): + action = policy[i] + output = self.env.step(action) + + env_reward += output[1] + done = output[2] + step_count += 1 + + if done: + self.counter_array = np.vstack((self.counter_array, step_count)) + break + + if not done and i == len(policy): + distance = -(self.env.goal_position - output[0][0]) + env_reward += distance * self.distance_penalty + self.counter_array = np.vstack((self.counter_array, step_count)) + + self.env.reset() + return env_reward, step_count + + def initialize(self): + self.env.reset() + self.reset_bo() + + self.X = torch.zeros((self.nr_init, self.nr_policy_weights)) + self.X_np = np.zeros((self.nr_init, self.nr_policy_weights)) + self.Y_np = np.zeros((self.nr_init, 1)) + + for i in range(self.nr_init): + self.policy_model.random_policy() + self.X_np[i, :] = self.policy_model.weights.T.clip(min=-1.0, max=1.0) + self.X[i, :] = torch.tensor((self.policy_model.weights.T.clip(min=-1.0, max=1.0) + 1) / 2) + policy = self.policy_model.rollout() + + reward, step_count = self.runner(policy) + + self.Y_np[i] = reward + + Y = torch.tensor(self.Y_np) + + self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5)) + mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) + fit_gpytorch_mll(mll) + + def next_observation(self): + if self.acq == "Expected Improvement": + ei = ExpectedImprovement(self.GP, best_f=self.best_reward[-1][0], maximize=True) + x_next, _ = optimize_acqf(ei, + bounds=self.bounds, + num_restarts=self.eval_restarts, + raw_samples=self.eval_X, + q=1) + + elif self.acq == "Probability of Improvement": + poi = ProbabilityOfImprovement(self.GP, best_f=self.best_reward[-1][0], maximize=True) + x_next, _ = optimize_acqf(poi, + bounds=self.bounds, + num_restarts=self.eval_restarts, + raw_samples=self.eval_X, + q=1) + + elif self.acq == "Upper Confidence Bound": + ucb = UpperConfidenceBound(self.GP, beta=2.576, maximize=True) + x_next, _ = optimize_acqf(ucb, + bounds=self.bounds, + num_restarts=self.eval_restarts, + raw_samples=self.eval_X, + q=1) + + else: + raise NotImplementedError + + return torch.t(x_next) + + def eval_new_observation(self, x_next): + new_weight = x_next.detach().numpy() * 2 - 1 + self.policy_model.weights = new_weight + policy = self.policy_model.rollout() + + reward, step_count = self.runner(policy) + + self.X_np = np.vstack((self.X_np, new_weight.reshape(1, -1))) + self.X = torch.vstack((self.X, x_next.reshape(1, -1))) + self.Y_np = np.vstack((self.Y_np, reward)) + + Y = torch.tensor(self.Y_np) + + self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5)) + mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) + fit_gpytorch_mll(mll) + + if self.episode == 0: + self.best_reward[0] = max(self.Y_np) + else: + self.best_reward = np.vstack((self.best_reward, max(self.Y_np))) + + self.episode += 1 + return step_count + + def add_new_observation(self, reward, x_new): + self.X = torch.vstack((self.X, torch.tensor(x_new))) + self.Y_np = np.vstack((self.Y_np, reward)) + + if self.episode == 0: + self.best_reward[0] = max(self.Y_np) + else: + self.best_reward = np.vstack((self.best_reward, max(self.Y_np))) + + self.episode += 1 + + def get_best_result(self): + Y = torch.tensor(self.Y_np) + self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5)) + mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) + fit_gpytorch_mll(mll) + + y_hat = self.GP.posterior(self.X) + idx = torch.argmax(y_hat.mean) + x_max = self.X_np[idx] + + self.policy_model.weights = x_max + best_policy = self.policy_model.rollout().reshape(-1, ) + + return best_policy, y_hat.mean[idx].detach().numpy(), x_max diff --git a/src/active_bo_ros/active_bo_ros/bo_torch_service.py b/src/active_bo_ros/active_bo_ros/bo_torch_service.py new file mode 100644 index 0000000..b38e693 --- /dev/null +++ b/src/active_bo_ros/active_bo_ros/bo_torch_service.py @@ -0,0 +1,72 @@ +from active_bo_msgs.srv import BO + +import rclpy +from rclpy.node import Node + +from active_bo_ros.BayesianOptimization.BayesianOptimizationTorch import BayesianOptimization +from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous_MountainCarEnv + +import numpy as np +import torch + + +class BOService(Node): + def __init__(self): + super().__init__('bo_service') + self.srv = self.create_service(BO, 'bo_srv', self.bo_callback) + + self.env = Continuous_MountainCarEnv() + self.distance_penalty = 0 + + self.nr_init = 3 + + def bo_callback(self, request, response): + self.get_logger().info('Bayesian Optimization Service started!') + nr_weights = request.nr_weights + max_steps = request.max_steps + nr_episodes = request.nr_episodes + nr_runs = request.nr_runs + acq = request.acquisition_function + self.get_logger().info(acq) + + reward = np.zeros((nr_episodes, nr_runs)) + best_pol_reward = np.zeros((1, nr_runs)) + best_policy = np.zeros((max_steps, nr_runs)) + best_weights = np.zeros((nr_weights, nr_runs)) + + BO = BayesianOptimization(self.env, + max_steps, + nr_init=self.nr_init, + acq=acq, + nr_weights=nr_weights) + + for i in range(nr_runs): + BO.initialize() + + for j in range(nr_episodes): + x_next = BO.next_observation() + BO.eval_new_observation(x_next) + + best_policy[:, i], best_pol_reward[:, i], best_weights[:, i] = BO.get_best_result() + + reward[:, i] = BO.best_reward.T + + response.reward_mean = np.mean(reward, axis=1).tolist() + response.reward_std = np.std(reward, axis=1).tolist() + + best_policy_idx = np.argmax(best_pol_reward) + response.best_weights = best_weights[:, best_policy_idx].tolist() + response.best_policy = best_policy[:, best_policy_idx].tolist() + return response + + +def main(args=None): + rclpy.init(args=args) + + bo_service = BOService() + + rclpy.spin(bo_service) + + +if __name__ == '__main__': + main() diff --git a/src/active_bo_ros/launch/bo_torch_service.launch.py b/src/active_bo_ros/launch/bo_torch_service.launch.py new file mode 100755 index 0000000..8a9d2ad --- /dev/null +++ b/src/active_bo_ros/launch/bo_torch_service.launch.py @@ -0,0 +1,12 @@ +from launch import LaunchDescription +from launch_ros.actions import Node + + +def generate_launch_description(): + return LaunchDescription([ + Node( + package='active_bo_ros', + executable='bo_torch_srv', + name='bo_srv' + ), + ]) \ No newline at end of file diff --git a/src/active_bo_ros/setup.py b/src/active_bo_ros/setup.py index a3336b2..467d95b 100644 --- a/src/active_bo_ros/setup.py +++ b/src/active_bo_ros/setup.py @@ -30,6 +30,7 @@ setup( 'policy_srv = active_bo_ros.policy_service:main', 'rl_srv = active_bo_ros.rl_service:main', 'bo_srv = active_bo_ros.bo_service:main', + 'bo_torch_srv = active_bo_ros.bo_torch_service:main', 'active_bo_srv = active_bo_ros.active_bo_service:main', 'active_rl_srv = active_bo_ros.active_rl_service:main', 'active_bo_topic = active_bo_ros.active_bo_topic:main',