Franka experiment complete + debugged

This commit is contained in:
Niko Feith 2023-09-15 17:35:52 +02:00
parent 54c196715e
commit 739bc5e160
4 changed files with 96 additions and 37 deletions

View File

@ -3,33 +3,46 @@ from scipy.stats import norm, multivariate_normal
class PreferenceExpectedImprovement: class PreferenceExpectedImprovement:
def __init__(self, nr_dims, nr_samples, lower_bound, upper_bound, initial_variance, update_variance, seed=None): def __init__(self, nr_dims, nr_samples, initial_variance, update_variance, lower_bound=None, upper_bound=None, seed=None,
fixed_dims=None):
self.nr_dims = nr_dims self.nr_dims = nr_dims
self.nr_samples = int(nr_samples) self.nr_samples = int(nr_samples)
self.lower_bound = lower_bound
self.upper_bound = upper_bound
self.init_var = initial_variance self.init_var = initial_variance
self.update_var = update_variance self.update_var = update_variance
if lower_bound is None:
self.lower_bound = [-1.] * nr_dims
else:
self.lower_bound = lower_bound
if upper_bound is None:
self.upper_bound = [1.] * nr_dims
else:
self.upper_bound = upper_bound
self.rng = np.random.default_rng(seed=seed) self.rng = np.random.default_rng(seed=seed)
# initial proposal distribution # initial proposal distribution
self.proposal_mean = np.zeros((nr_dims, 1)) self.proposal_mean = np.zeros((nr_dims, 1))
self.proposal_cov = np.diag(np.ones((nr_dims,)) * self.init_var) self.proposal_cov = np.diag(np.ones((nr_dims,)) * self.init_var)
# fixed dimension for robot experiment
self.fixed_dims = fixed_dims
def rejection_sampling(self): def rejection_sampling(self):
samples = np.empty((0, self.nr_dims)) samples = np.empty((0, self.nr_dims))
while samples.shape[0] < self.nr_samples: while samples.shape[0] < self.nr_samples:
# sample from the multi variate gaussian distribution # sample from the multi variate gaussian distribution
sample = np.zeros((1, self.nr_dims)) sample = np.zeros((1, self.nr_dims))
for i in range(self.nr_dims): for i in range(self.nr_dims):
if i in self.fixed_dims:
sample[0, i] = self.fixed_dims[i]
else:
check = False check = False
while not check: while not check:
sample[0, i] = self.rng.normal(self.proposal_mean[i], self.proposal_cov[i, i]) sample[0, i] = self.rng.normal(self.proposal_mean[i], self.proposal_cov[i, i])
if self.lower_bound <= sample[0, i] <= self.upper_bound: if self.lower_bound[i] <= sample[0, i] <= self.upper_bound[i]:
check = True check = True
samples = np.append(samples, sample, axis=0) samples = np.append(samples, sample, axis=0)

View File

@ -16,7 +16,8 @@ warnings.filterwarnings('ignore', category=ConvergenceWarning)
class BayesianOptimization: class BayesianOptimization:
def __init__(self, nr_steps, nr_dims, nr_weights, acq='ei', seed=None): def __init__(self, nr_steps, nr_dims, nr_weights, acq='ei', seed=None,
fixed_dims=None, lower_bound=None, upper_bound=None):
self.acq = acq self.acq = acq
self.episode = 0 self.episode = 0
@ -25,16 +26,34 @@ class BayesianOptimization:
self.nr_weights = nr_weights self.nr_weights = nr_weights
self.weights = self.nr_weights * self.nr_dims self.weights = self.nr_weights * self.nr_dims
self.lower_bound = -1.0 if lower_bound is None:
self.upper_bound = 1.0 self.lower_bound = [-1.]*nr_dims
self.lower_bound_1D = [-1.] * self.weights
else:
self.lower_bound = lower_bound
self.lower_bound_1D = [x for x in lower_bound for _ in range(self.nr_weights)]
if upper_bound is None:
self.upper_bound = [1.]*nr_dims
self.upper_bound_1D = [1.] * self.weights
else:
self.upper_bound = upper_bound
self.upper_bound_1D = [x for x in upper_bound for _ in range(self.nr_weights)]
self.seed = seed self.seed = seed
self.X = None self.X = None
self.Y = None self.Y = None
self.gp = None self.gp = None
self.fixed_dims = fixed_dims
if fixed_dims is not None:
self.fixed_dims_1D = self.convert_fixed_dims(fixed_dims, nr_weights)
self.policy_model = GaussianRBF(self.nr_steps, self.nr_weights, self.nr_dims, self.policy_model = GaussianRBF(self.nr_steps, self.nr_weights, self.nr_dims,
lowerb=self.lower_bound, upperb=self.upper_bound, seed=seed) lower_bound=self.lower_bound, upper_bound=self.upper_bound, seed=seed,
fixed_dims=fixed_dims)
self.acq_sample_size = 100 self.acq_sample_size = 100
@ -43,16 +62,26 @@ class BayesianOptimization:
if acq == "Preference Expected Improvement": if acq == "Preference Expected Improvement":
self.acq_fun = PreferenceExpectedImprovement(self.weights, self.acq_fun = PreferenceExpectedImprovement(self.weights,
self.acq_sample_size, self.acq_sample_size,
self.lower_bound,
self.upper_bound,
initial_variance=10.0, initial_variance=10.0,
update_variance=0.05, update_variance=0.05,
seed=seed) seed=seed,
fixed_dims=self.fixed_dims_1D,
lower_bound=self.lower_bound_1D,
upper_bound=self.upper_bound_1D,)
self.reset_bo() self.reset_bo()
@staticmethod
def convert_fixed_dims(fixed_dims, nr_weights):
new_fixed_dims = {}
for dim, values in fixed_dims.items():
for idx, value in enumerate(values):
new_dim = dim + idx * nr_weights
new_fixed_dims[new_dim] = value
return new_fixed_dims
def reset_bo(self): def reset_bo(self):
self.gp = GaussianProcessRegressor(Matern(nu=1.5, ), n_restarts_optimizer=5) #length_scale=(1e-8, 1e5) self.gp = GaussianProcessRegressor(Matern(nu=1.5, ), n_restarts_optimizer=5) # length_scale=(1e-8, 1e5)
self.best_reward = np.empty((1, 1)) self.best_reward = np.empty((1, 1))
self.X = np.zeros((1, self.weights), dtype=np.float64) self.X = np.zeros((1, self.weights), dtype=np.float64)
self.Y = np.zeros((1, 1), dtype=np.float64) self.Y = np.zeros((1, 1), dtype=np.float64)
@ -117,4 +146,3 @@ class BayesianOptimization:
x_max = self.X[idx, :].reshape((self.nr_weights, self.nr_dims), order='F') x_max = self.X[idx, :].reshape((self.nr_weights, self.nr_dims), order='F')
return y_max, x_max, idx return y_max, x_max, idx

View File

@ -3,7 +3,7 @@ import numpy as np
class GaussianRBF: class GaussianRBF:
def __init__(self, nr_steps, nr_weights, nr_dims, lowerb=-1.0, upperb=1.0, seed=None): def __init__(self, nr_steps, nr_weights, nr_dims, lower_bound=None, upper_bound=None, seed=None, fixed_dims=None):
self.nr_weights = nr_weights self.nr_weights = nr_weights
self.nr_steps = nr_steps self.nr_steps = nr_steps
self.nr_dims = nr_dims self.nr_dims = nr_dims
@ -11,11 +11,20 @@ class GaussianRBF:
self.weights = None self.weights = None
self.trajectory = None self.trajectory = None
self.lowerb = lowerb if lower_bound is None:
self.upperb = upperb self.lower_bound = [-1.]*nr_dims
else:
self.lower_bound = lower_bound
if upper_bound is None:
self.upper_bound = [1.]*nr_dims
else:
self.upper_bound = upper_bound
self.rng = np.random.default_rng(seed=seed) self.rng = np.random.default_rng(seed=seed)
self.fixed_dims = fixed_dims
# initialize # initialize
self.mid_points = np.linspace(0, self.nr_steps, self.nr_weights) self.mid_points = np.linspace(0, self.nr_steps, self.nr_weights)
if nr_weights > 1: if nr_weights > 1:
@ -30,8 +39,13 @@ class GaussianRBF:
self.trajectory = np.zeros((self.nr_steps, self.nr_dims)) self.trajectory = np.zeros((self.nr_steps, self.nr_dims))
def random_weights(self): def random_weights(self):
for w in range(self.nr_weights):
# If dim exists in fixed_dims, set weights directly
if w in self.fixed_dims:
self.weights[w, :] = np.array(self.fixed_dims[w])
else:
for dim in range(self.nr_dims): for dim in range(self.nr_dims):
self.weights[:, dim] = self.rng.uniform(self.lowerb, self.upperb, self.nr_weights) self.weights[w, dim] = self.rng.uniform(self.lower_bound[dim], self.upper_bound[dim], 1)
return self.weights return self.weights
def rollout(self): def rollout(self):

View File

@ -101,6 +101,11 @@ class ActiveBOTopic(Node):
self.mainloop_callback, self.mainloop_callback,
callback_group=mainloop_callback_group) callback_group=mainloop_callback_group)
# Franka Experiment
self.fixed_dimension = {0: [0.5, -0.3]}
self.lower_bound = [0.3, -0.35]
self.upper_bound = [0.7, 0.35]
def reset_bo_request(self): def reset_bo_request(self):
self.bo_env = None self.bo_env = None
self.bo_metric = None self.bo_metric = None
@ -199,7 +204,10 @@ class ActiveBOTopic(Node):
self.BO = BayesianOptimization(self.bo_steps, self.BO = BayesianOptimization(self.bo_steps,
self.bo_nr_dims, self.bo_nr_dims,
self.bo_nr_weights, self.bo_nr_weights,
acq=self.bo_acq_fcn) acq=self.bo_acq_fcn,
fixed_dims=self.fixed_dimension,
lower_bound=self.lower_bound,
upper_bound=self.upper_bound)
self.BO.reset_bo() self.BO.reset_bo()
@ -209,7 +217,7 @@ class ActiveBOTopic(Node):
if self.init_pending: if self.init_pending:
rl_msg = ActiveRLRequest() rl_msg = ActiveRLRequest()
rl_msg.interactive_run = 2 rl_msg.interactive_run = 1
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist() rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist() rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
rl_msg.nr_weights = self.bo_nr_weights rl_msg.nr_weights = self.bo_nr_weights
@ -237,6 +245,8 @@ class ActiveBOTopic(Node):
env = 're' env = 're'
elif self.bo_env == "Finger": elif self.bo_env == "Finger":
env = 'fin' env = 'fin'
elif self.bo_env == "Franka":
env = 'franka'
else: else:
raise NotImplementedError raise NotImplementedError
@ -264,9 +274,9 @@ class ActiveBOTopic(Node):
np.savetxt(path, data, delimiter=',') np.savetxt(path, data, delimiter=',')
rl_msg = ActiveRLRequest() rl_msg = ActiveRLRequest()
rl_msg.interactive_run = 1 rl_msg.interactive_run = 2
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist() rl_msg.weights = self.best_policy[best_policy_idx, :, :].flatten('F').tolist()
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist() rl_msg.weights = self.best_weights[best_policy_idx, :, :].flatten('F').tolist()
rl_msg.nr_weights = self.bo_nr_weights rl_msg.nr_weights = self.bo_nr_weights
rl_msg.nr_steps = self.bo_steps rl_msg.nr_steps = self.bo_steps
rl_msg.nr_dims = self.bo_nr_dims rl_msg.nr_dims = self.bo_nr_dims
@ -275,8 +285,6 @@ class ActiveBOTopic(Node):
self.rl_pending = True self.rl_pending = True
self.active_rl_pub.publish(rl_msg)
self.get_logger().info('Responding: Active BO') self.get_logger().info('Responding: Active BO')
self.active_bo_pub.publish(bo_response) self.active_bo_pub.publish(bo_response)
self.reset_bo_request() self.reset_bo_request()
@ -319,16 +327,12 @@ class ActiveBOTopic(Node):
rl_msg = ActiveRLRequest() rl_msg = ActiveRLRequest()
rl_msg.interactive_run = 0 rl_msg.interactive_run = 0
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist() rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
rl_msg.weights = old_weights.flatten('F').tolist()
rl_msg.nr_weights = self.bo_nr_weights rl_msg.nr_weights = self.bo_nr_weights
rl_msg.nr_steps = self.bo_steps rl_msg.nr_steps = self.bo_steps
rl_msg.nr_dims = self.bo_nr_dims rl_msg.nr_dims = self.bo_nr_dims
self.active_rl_pub.publish(rl_msg)
self.rl_pending = True
self.active_rl_pub.publish(rl_msg) self.active_rl_pub.publish(rl_msg)
self.rl_pending = True self.rl_pending = True
@ -338,8 +342,8 @@ class ActiveBOTopic(Node):
rl_msg = ActiveRLRequest() rl_msg = ActiveRLRequest()
rl_msg.interactive_run = 1 rl_msg.interactive_run = 1
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist() rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
rl_msg.weights = x_next.flatten('F').tolist()
rl_msg.nr_weights = self.bo_nr_weights rl_msg.nr_weights = self.bo_nr_weights
rl_msg.nr_steps = self.bo_steps rl_msg.nr_steps = self.bo_steps
rl_msg.nr_dims = self.bo_nr_dims rl_msg.nr_dims = self.bo_nr_dims