From 739bc5e160278f55b04e158ca94dc97b46d79755 Mon Sep 17 00:00:00 2001 From: Niko Date: Fri, 15 Sep 2023 17:35:52 +0200 Subject: [PATCH] Franka experiment complete + debugged --- .../PreferenceExpectedImprovement.py | 33 +++++++++---- .../BayesianOptimization/BO2D.py | 46 +++++++++++++++---- .../PolicyModel/GaussianModelMultiDim.py | 24 ++++++++-- .../active_bo_ros/interactive_bo_robot.py | 30 ++++++------ 4 files changed, 96 insertions(+), 37 deletions(-) diff --git a/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/PreferenceExpectedImprovement.py b/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/PreferenceExpectedImprovement.py index f0b44db..a78d37e 100644 --- a/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/PreferenceExpectedImprovement.py +++ b/src/active_bo_ros/active_bo_ros/AcquisitionFunctions/PreferenceExpectedImprovement.py @@ -3,34 +3,47 @@ from scipy.stats import norm, multivariate_normal class PreferenceExpectedImprovement: - def __init__(self, nr_dims, nr_samples, lower_bound, upper_bound, initial_variance, update_variance, seed=None): + def __init__(self, nr_dims, nr_samples, initial_variance, update_variance, lower_bound=None, upper_bound=None, seed=None, + fixed_dims=None): self.nr_dims = nr_dims self.nr_samples = int(nr_samples) - - self.lower_bound = lower_bound - self.upper_bound = upper_bound - self.init_var = initial_variance self.update_var = update_variance + if lower_bound is None: + self.lower_bound = [-1.] * nr_dims + else: + self.lower_bound = lower_bound + + if upper_bound is None: + self.upper_bound = [1.] * nr_dims + else: + self.upper_bound = upper_bound + self.rng = np.random.default_rng(seed=seed) # initial proposal distribution self.proposal_mean = np.zeros((nr_dims, 1)) self.proposal_cov = np.diag(np.ones((nr_dims,)) * self.init_var) + # fixed dimension for robot experiment + self.fixed_dims = fixed_dims + def rejection_sampling(self): samples = np.empty((0, self.nr_dims)) while samples.shape[0] < self.nr_samples: # sample from the multi variate gaussian distribution sample = np.zeros((1, self.nr_dims)) for i in range(self.nr_dims): - check = False - while not check: - sample[0, i] = self.rng.normal(self.proposal_mean[i], self.proposal_cov[i, i]) - if self.lower_bound <= sample[0, i] <= self.upper_bound: - check = True + if i in self.fixed_dims: + sample[0, i] = self.fixed_dims[i] + else: + check = False + while not check: + sample[0, i] = self.rng.normal(self.proposal_mean[i], self.proposal_cov[i, i]) + if self.lower_bound[i] <= sample[0, i] <= self.upper_bound[i]: + check = True samples = np.append(samples, sample, axis=0) diff --git a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BO2D.py b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BO2D.py index 298d206..be77ad9 100644 --- a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BO2D.py +++ b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BO2D.py @@ -16,7 +16,8 @@ warnings.filterwarnings('ignore', category=ConvergenceWarning) class BayesianOptimization: - def __init__(self, nr_steps, nr_dims, nr_weights, acq='ei', seed=None): + def __init__(self, nr_steps, nr_dims, nr_weights, acq='ei', seed=None, + fixed_dims=None, lower_bound=None, upper_bound=None): self.acq = acq self.episode = 0 @@ -25,16 +26,34 @@ class BayesianOptimization: self.nr_weights = nr_weights self.weights = self.nr_weights * self.nr_dims - self.lower_bound = -1.0 - self.upper_bound = 1.0 + if lower_bound is None: + self.lower_bound = [-1.]*nr_dims + self.lower_bound_1D = [-1.] * self.weights + else: + self.lower_bound = lower_bound + self.lower_bound_1D = [x for x in lower_bound for _ in range(self.nr_weights)] + + if upper_bound is None: + self.upper_bound = [1.]*nr_dims + self.upper_bound_1D = [1.] * self.weights + else: + self.upper_bound = upper_bound + self.upper_bound_1D = [x for x in upper_bound for _ in range(self.nr_weights)] + self.seed = seed self.X = None self.Y = None self.gp = None + self.fixed_dims = fixed_dims + + if fixed_dims is not None: + self.fixed_dims_1D = self.convert_fixed_dims(fixed_dims, nr_weights) + self.policy_model = GaussianRBF(self.nr_steps, self.nr_weights, self.nr_dims, - lowerb=self.lower_bound, upperb=self.upper_bound, seed=seed) + lower_bound=self.lower_bound, upper_bound=self.upper_bound, seed=seed, + fixed_dims=fixed_dims) self.acq_sample_size = 100 @@ -43,16 +62,26 @@ class BayesianOptimization: if acq == "Preference Expected Improvement": self.acq_fun = PreferenceExpectedImprovement(self.weights, self.acq_sample_size, - self.lower_bound, - self.upper_bound, initial_variance=10.0, update_variance=0.05, - seed=seed) + seed=seed, + fixed_dims=self.fixed_dims_1D, + lower_bound=self.lower_bound_1D, + upper_bound=self.upper_bound_1D,) self.reset_bo() + @staticmethod + def convert_fixed_dims(fixed_dims, nr_weights): + new_fixed_dims = {} + for dim, values in fixed_dims.items(): + for idx, value in enumerate(values): + new_dim = dim + idx * nr_weights + new_fixed_dims[new_dim] = value + return new_fixed_dims + def reset_bo(self): - self.gp = GaussianProcessRegressor(Matern(nu=1.5, ), n_restarts_optimizer=5) #length_scale=(1e-8, 1e5) + self.gp = GaussianProcessRegressor(Matern(nu=1.5, ), n_restarts_optimizer=5) # length_scale=(1e-8, 1e5) self.best_reward = np.empty((1, 1)) self.X = np.zeros((1, self.weights), dtype=np.float64) self.Y = np.zeros((1, 1), dtype=np.float64) @@ -117,4 +146,3 @@ class BayesianOptimization: x_max = self.X[idx, :].reshape((self.nr_weights, self.nr_dims), order='F') return y_max, x_max, idx - diff --git a/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianModelMultiDim.py b/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianModelMultiDim.py index 4b97d1e..15ba4f7 100644 --- a/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianModelMultiDim.py +++ b/src/active_bo_ros/active_bo_ros/PolicyModel/GaussianModelMultiDim.py @@ -3,7 +3,7 @@ import numpy as np class GaussianRBF: - def __init__(self, nr_steps, nr_weights, nr_dims, lowerb=-1.0, upperb=1.0, seed=None): + def __init__(self, nr_steps, nr_weights, nr_dims, lower_bound=None, upper_bound=None, seed=None, fixed_dims=None): self.nr_weights = nr_weights self.nr_steps = nr_steps self.nr_dims = nr_dims @@ -11,11 +11,20 @@ class GaussianRBF: self.weights = None self.trajectory = None - self.lowerb = lowerb - self.upperb = upperb + if lower_bound is None: + self.lower_bound = [-1.]*nr_dims + else: + self.lower_bound = lower_bound + + if upper_bound is None: + self.upper_bound = [1.]*nr_dims + else: + self.upper_bound = upper_bound self.rng = np.random.default_rng(seed=seed) + self.fixed_dims = fixed_dims + # initialize self.mid_points = np.linspace(0, self.nr_steps, self.nr_weights) if nr_weights > 1: @@ -30,8 +39,13 @@ class GaussianRBF: self.trajectory = np.zeros((self.nr_steps, self.nr_dims)) def random_weights(self): - for dim in range(self.nr_dims): - self.weights[:, dim] = self.rng.uniform(self.lowerb, self.upperb, self.nr_weights) + for w in range(self.nr_weights): + # If dim exists in fixed_dims, set weights directly + if w in self.fixed_dims: + self.weights[w, :] = np.array(self.fixed_dims[w]) + else: + for dim in range(self.nr_dims): + self.weights[w, dim] = self.rng.uniform(self.lower_bound[dim], self.upper_bound[dim], 1) return self.weights def rollout(self): diff --git a/src/active_bo_ros/active_bo_ros/interactive_bo_robot.py b/src/active_bo_ros/active_bo_ros/interactive_bo_robot.py index a248e8d..f7ef22f 100644 --- a/src/active_bo_ros/active_bo_ros/interactive_bo_robot.py +++ b/src/active_bo_ros/active_bo_ros/interactive_bo_robot.py @@ -101,6 +101,11 @@ class ActiveBOTopic(Node): self.mainloop_callback, callback_group=mainloop_callback_group) + # Franka Experiment + self.fixed_dimension = {0: [0.5, -0.3]} + self.lower_bound = [0.3, -0.35] + self.upper_bound = [0.7, 0.35] + def reset_bo_request(self): self.bo_env = None self.bo_metric = None @@ -199,7 +204,10 @@ class ActiveBOTopic(Node): self.BO = BayesianOptimization(self.bo_steps, self.bo_nr_dims, self.bo_nr_weights, - acq=self.bo_acq_fcn) + acq=self.bo_acq_fcn, + fixed_dims=self.fixed_dimension, + lower_bound=self.lower_bound, + upper_bound=self.upper_bound) self.BO.reset_bo() @@ -209,7 +217,7 @@ class ActiveBOTopic(Node): if self.init_pending: rl_msg = ActiveRLRequest() - rl_msg.interactive_run = 2 + rl_msg.interactive_run = 1 rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist() rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist() rl_msg.nr_weights = self.bo_nr_weights @@ -237,6 +245,8 @@ class ActiveBOTopic(Node): env = 're' elif self.bo_env == "Finger": env = 'fin' + elif self.bo_env == "Franka": + env = 'franka' else: raise NotImplementedError @@ -264,9 +274,9 @@ class ActiveBOTopic(Node): np.savetxt(path, data, delimiter=',') rl_msg = ActiveRLRequest() - rl_msg.interactive_run = 1 - rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist() - rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist() + rl_msg.interactive_run = 2 + rl_msg.weights = self.best_policy[best_policy_idx, :, :].flatten('F').tolist() + rl_msg.weights = self.best_weights[best_policy_idx, :, :].flatten('F').tolist() rl_msg.nr_weights = self.bo_nr_weights rl_msg.nr_steps = self.bo_steps rl_msg.nr_dims = self.bo_nr_dims @@ -275,8 +285,6 @@ class ActiveBOTopic(Node): self.rl_pending = True - self.active_rl_pub.publish(rl_msg) - self.get_logger().info('Responding: Active BO') self.active_bo_pub.publish(bo_response) self.reset_bo_request() @@ -319,16 +327,12 @@ class ActiveBOTopic(Node): rl_msg = ActiveRLRequest() rl_msg.interactive_run = 0 - rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist() rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist() + rl_msg.weights = old_weights.flatten('F').tolist() rl_msg.nr_weights = self.bo_nr_weights rl_msg.nr_steps = self.bo_steps rl_msg.nr_dims = self.bo_nr_dims - self.active_rl_pub.publish(rl_msg) - - self.rl_pending = True - self.active_rl_pub.publish(rl_msg) self.rl_pending = True @@ -338,8 +342,8 @@ class ActiveBOTopic(Node): rl_msg = ActiveRLRequest() rl_msg.interactive_run = 1 - rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist() rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist() + rl_msg.weights = x_next.flatten('F').tolist() rl_msg.nr_weights = self.bo_nr_weights rl_msg.nr_steps = self.bo_steps rl_msg.nr_dims = self.bo_nr_dims