Franka experiment complete + debugged
This commit is contained in:
parent
54c196715e
commit
739bc5e160
@ -3,33 +3,46 @@ from scipy.stats import norm, multivariate_normal
|
|||||||
|
|
||||||
|
|
||||||
class PreferenceExpectedImprovement:
|
class PreferenceExpectedImprovement:
|
||||||
def __init__(self, nr_dims, nr_samples, lower_bound, upper_bound, initial_variance, update_variance, seed=None):
|
def __init__(self, nr_dims, nr_samples, initial_variance, update_variance, lower_bound=None, upper_bound=None, seed=None,
|
||||||
|
fixed_dims=None):
|
||||||
|
|
||||||
self.nr_dims = nr_dims
|
self.nr_dims = nr_dims
|
||||||
self.nr_samples = int(nr_samples)
|
self.nr_samples = int(nr_samples)
|
||||||
|
|
||||||
self.lower_bound = lower_bound
|
|
||||||
self.upper_bound = upper_bound
|
|
||||||
|
|
||||||
self.init_var = initial_variance
|
self.init_var = initial_variance
|
||||||
self.update_var = update_variance
|
self.update_var = update_variance
|
||||||
|
|
||||||
|
if lower_bound is None:
|
||||||
|
self.lower_bound = [-1.] * nr_dims
|
||||||
|
else:
|
||||||
|
self.lower_bound = lower_bound
|
||||||
|
|
||||||
|
if upper_bound is None:
|
||||||
|
self.upper_bound = [1.] * nr_dims
|
||||||
|
else:
|
||||||
|
self.upper_bound = upper_bound
|
||||||
|
|
||||||
self.rng = np.random.default_rng(seed=seed)
|
self.rng = np.random.default_rng(seed=seed)
|
||||||
|
|
||||||
# initial proposal distribution
|
# initial proposal distribution
|
||||||
self.proposal_mean = np.zeros((nr_dims, 1))
|
self.proposal_mean = np.zeros((nr_dims, 1))
|
||||||
self.proposal_cov = np.diag(np.ones((nr_dims,)) * self.init_var)
|
self.proposal_cov = np.diag(np.ones((nr_dims,)) * self.init_var)
|
||||||
|
|
||||||
|
# fixed dimension for robot experiment
|
||||||
|
self.fixed_dims = fixed_dims
|
||||||
|
|
||||||
def rejection_sampling(self):
|
def rejection_sampling(self):
|
||||||
samples = np.empty((0, self.nr_dims))
|
samples = np.empty((0, self.nr_dims))
|
||||||
while samples.shape[0] < self.nr_samples:
|
while samples.shape[0] < self.nr_samples:
|
||||||
# sample from the multi variate gaussian distribution
|
# sample from the multi variate gaussian distribution
|
||||||
sample = np.zeros((1, self.nr_dims))
|
sample = np.zeros((1, self.nr_dims))
|
||||||
for i in range(self.nr_dims):
|
for i in range(self.nr_dims):
|
||||||
|
if i in self.fixed_dims:
|
||||||
|
sample[0, i] = self.fixed_dims[i]
|
||||||
|
else:
|
||||||
check = False
|
check = False
|
||||||
while not check:
|
while not check:
|
||||||
sample[0, i] = self.rng.normal(self.proposal_mean[i], self.proposal_cov[i, i])
|
sample[0, i] = self.rng.normal(self.proposal_mean[i], self.proposal_cov[i, i])
|
||||||
if self.lower_bound <= sample[0, i] <= self.upper_bound:
|
if self.lower_bound[i] <= sample[0, i] <= self.upper_bound[i]:
|
||||||
check = True
|
check = True
|
||||||
|
|
||||||
samples = np.append(samples, sample, axis=0)
|
samples = np.append(samples, sample, axis=0)
|
||||||
|
@ -16,7 +16,8 @@ warnings.filterwarnings('ignore', category=ConvergenceWarning)
|
|||||||
|
|
||||||
|
|
||||||
class BayesianOptimization:
|
class BayesianOptimization:
|
||||||
def __init__(self, nr_steps, nr_dims, nr_weights, acq='ei', seed=None):
|
def __init__(self, nr_steps, nr_dims, nr_weights, acq='ei', seed=None,
|
||||||
|
fixed_dims=None, lower_bound=None, upper_bound=None):
|
||||||
self.acq = acq
|
self.acq = acq
|
||||||
self.episode = 0
|
self.episode = 0
|
||||||
|
|
||||||
@ -25,16 +26,34 @@ class BayesianOptimization:
|
|||||||
self.nr_weights = nr_weights
|
self.nr_weights = nr_weights
|
||||||
self.weights = self.nr_weights * self.nr_dims
|
self.weights = self.nr_weights * self.nr_dims
|
||||||
|
|
||||||
self.lower_bound = -1.0
|
if lower_bound is None:
|
||||||
self.upper_bound = 1.0
|
self.lower_bound = [-1.]*nr_dims
|
||||||
|
self.lower_bound_1D = [-1.] * self.weights
|
||||||
|
else:
|
||||||
|
self.lower_bound = lower_bound
|
||||||
|
self.lower_bound_1D = [x for x in lower_bound for _ in range(self.nr_weights)]
|
||||||
|
|
||||||
|
if upper_bound is None:
|
||||||
|
self.upper_bound = [1.]*nr_dims
|
||||||
|
self.upper_bound_1D = [1.] * self.weights
|
||||||
|
else:
|
||||||
|
self.upper_bound = upper_bound
|
||||||
|
self.upper_bound_1D = [x for x in upper_bound for _ in range(self.nr_weights)]
|
||||||
|
|
||||||
self.seed = seed
|
self.seed = seed
|
||||||
|
|
||||||
self.X = None
|
self.X = None
|
||||||
self.Y = None
|
self.Y = None
|
||||||
self.gp = None
|
self.gp = None
|
||||||
|
|
||||||
|
self.fixed_dims = fixed_dims
|
||||||
|
|
||||||
|
if fixed_dims is not None:
|
||||||
|
self.fixed_dims_1D = self.convert_fixed_dims(fixed_dims, nr_weights)
|
||||||
|
|
||||||
self.policy_model = GaussianRBF(self.nr_steps, self.nr_weights, self.nr_dims,
|
self.policy_model = GaussianRBF(self.nr_steps, self.nr_weights, self.nr_dims,
|
||||||
lowerb=self.lower_bound, upperb=self.upper_bound, seed=seed)
|
lower_bound=self.lower_bound, upper_bound=self.upper_bound, seed=seed,
|
||||||
|
fixed_dims=fixed_dims)
|
||||||
|
|
||||||
self.acq_sample_size = 100
|
self.acq_sample_size = 100
|
||||||
|
|
||||||
@ -43,14 +62,24 @@ class BayesianOptimization:
|
|||||||
if acq == "Preference Expected Improvement":
|
if acq == "Preference Expected Improvement":
|
||||||
self.acq_fun = PreferenceExpectedImprovement(self.weights,
|
self.acq_fun = PreferenceExpectedImprovement(self.weights,
|
||||||
self.acq_sample_size,
|
self.acq_sample_size,
|
||||||
self.lower_bound,
|
|
||||||
self.upper_bound,
|
|
||||||
initial_variance=10.0,
|
initial_variance=10.0,
|
||||||
update_variance=0.05,
|
update_variance=0.05,
|
||||||
seed=seed)
|
seed=seed,
|
||||||
|
fixed_dims=self.fixed_dims_1D,
|
||||||
|
lower_bound=self.lower_bound_1D,
|
||||||
|
upper_bound=self.upper_bound_1D,)
|
||||||
|
|
||||||
self.reset_bo()
|
self.reset_bo()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def convert_fixed_dims(fixed_dims, nr_weights):
|
||||||
|
new_fixed_dims = {}
|
||||||
|
for dim, values in fixed_dims.items():
|
||||||
|
for idx, value in enumerate(values):
|
||||||
|
new_dim = dim + idx * nr_weights
|
||||||
|
new_fixed_dims[new_dim] = value
|
||||||
|
return new_fixed_dims
|
||||||
|
|
||||||
def reset_bo(self):
|
def reset_bo(self):
|
||||||
self.gp = GaussianProcessRegressor(Matern(nu=1.5, ), n_restarts_optimizer=5) # length_scale=(1e-8, 1e5)
|
self.gp = GaussianProcessRegressor(Matern(nu=1.5, ), n_restarts_optimizer=5) # length_scale=(1e-8, 1e5)
|
||||||
self.best_reward = np.empty((1, 1))
|
self.best_reward = np.empty((1, 1))
|
||||||
@ -117,4 +146,3 @@ class BayesianOptimization:
|
|||||||
x_max = self.X[idx, :].reshape((self.nr_weights, self.nr_dims), order='F')
|
x_max = self.X[idx, :].reshape((self.nr_weights, self.nr_dims), order='F')
|
||||||
|
|
||||||
return y_max, x_max, idx
|
return y_max, x_max, idx
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ import numpy as np
|
|||||||
|
|
||||||
|
|
||||||
class GaussianRBF:
|
class GaussianRBF:
|
||||||
def __init__(self, nr_steps, nr_weights, nr_dims, lowerb=-1.0, upperb=1.0, seed=None):
|
def __init__(self, nr_steps, nr_weights, nr_dims, lower_bound=None, upper_bound=None, seed=None, fixed_dims=None):
|
||||||
self.nr_weights = nr_weights
|
self.nr_weights = nr_weights
|
||||||
self.nr_steps = nr_steps
|
self.nr_steps = nr_steps
|
||||||
self.nr_dims = nr_dims
|
self.nr_dims = nr_dims
|
||||||
@ -11,11 +11,20 @@ class GaussianRBF:
|
|||||||
self.weights = None
|
self.weights = None
|
||||||
self.trajectory = None
|
self.trajectory = None
|
||||||
|
|
||||||
self.lowerb = lowerb
|
if lower_bound is None:
|
||||||
self.upperb = upperb
|
self.lower_bound = [-1.]*nr_dims
|
||||||
|
else:
|
||||||
|
self.lower_bound = lower_bound
|
||||||
|
|
||||||
|
if upper_bound is None:
|
||||||
|
self.upper_bound = [1.]*nr_dims
|
||||||
|
else:
|
||||||
|
self.upper_bound = upper_bound
|
||||||
|
|
||||||
self.rng = np.random.default_rng(seed=seed)
|
self.rng = np.random.default_rng(seed=seed)
|
||||||
|
|
||||||
|
self.fixed_dims = fixed_dims
|
||||||
|
|
||||||
# initialize
|
# initialize
|
||||||
self.mid_points = np.linspace(0, self.nr_steps, self.nr_weights)
|
self.mid_points = np.linspace(0, self.nr_steps, self.nr_weights)
|
||||||
if nr_weights > 1:
|
if nr_weights > 1:
|
||||||
@ -30,8 +39,13 @@ class GaussianRBF:
|
|||||||
self.trajectory = np.zeros((self.nr_steps, self.nr_dims))
|
self.trajectory = np.zeros((self.nr_steps, self.nr_dims))
|
||||||
|
|
||||||
def random_weights(self):
|
def random_weights(self):
|
||||||
|
for w in range(self.nr_weights):
|
||||||
|
# If dim exists in fixed_dims, set weights directly
|
||||||
|
if w in self.fixed_dims:
|
||||||
|
self.weights[w, :] = np.array(self.fixed_dims[w])
|
||||||
|
else:
|
||||||
for dim in range(self.nr_dims):
|
for dim in range(self.nr_dims):
|
||||||
self.weights[:, dim] = self.rng.uniform(self.lowerb, self.upperb, self.nr_weights)
|
self.weights[w, dim] = self.rng.uniform(self.lower_bound[dim], self.upper_bound[dim], 1)
|
||||||
return self.weights
|
return self.weights
|
||||||
|
|
||||||
def rollout(self):
|
def rollout(self):
|
||||||
|
@ -101,6 +101,11 @@ class ActiveBOTopic(Node):
|
|||||||
self.mainloop_callback,
|
self.mainloop_callback,
|
||||||
callback_group=mainloop_callback_group)
|
callback_group=mainloop_callback_group)
|
||||||
|
|
||||||
|
# Franka Experiment
|
||||||
|
self.fixed_dimension = {0: [0.5, -0.3]}
|
||||||
|
self.lower_bound = [0.3, -0.35]
|
||||||
|
self.upper_bound = [0.7, 0.35]
|
||||||
|
|
||||||
def reset_bo_request(self):
|
def reset_bo_request(self):
|
||||||
self.bo_env = None
|
self.bo_env = None
|
||||||
self.bo_metric = None
|
self.bo_metric = None
|
||||||
@ -199,7 +204,10 @@ class ActiveBOTopic(Node):
|
|||||||
self.BO = BayesianOptimization(self.bo_steps,
|
self.BO = BayesianOptimization(self.bo_steps,
|
||||||
self.bo_nr_dims,
|
self.bo_nr_dims,
|
||||||
self.bo_nr_weights,
|
self.bo_nr_weights,
|
||||||
acq=self.bo_acq_fcn)
|
acq=self.bo_acq_fcn,
|
||||||
|
fixed_dims=self.fixed_dimension,
|
||||||
|
lower_bound=self.lower_bound,
|
||||||
|
upper_bound=self.upper_bound)
|
||||||
|
|
||||||
self.BO.reset_bo()
|
self.BO.reset_bo()
|
||||||
|
|
||||||
@ -209,7 +217,7 @@ class ActiveBOTopic(Node):
|
|||||||
if self.init_pending:
|
if self.init_pending:
|
||||||
|
|
||||||
rl_msg = ActiveRLRequest()
|
rl_msg = ActiveRLRequest()
|
||||||
rl_msg.interactive_run = 2
|
rl_msg.interactive_run = 1
|
||||||
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
|
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
|
||||||
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
||||||
rl_msg.nr_weights = self.bo_nr_weights
|
rl_msg.nr_weights = self.bo_nr_weights
|
||||||
@ -237,6 +245,8 @@ class ActiveBOTopic(Node):
|
|||||||
env = 're'
|
env = 're'
|
||||||
elif self.bo_env == "Finger":
|
elif self.bo_env == "Finger":
|
||||||
env = 'fin'
|
env = 'fin'
|
||||||
|
elif self.bo_env == "Franka":
|
||||||
|
env = 'franka'
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@ -264,9 +274,9 @@ class ActiveBOTopic(Node):
|
|||||||
np.savetxt(path, data, delimiter=',')
|
np.savetxt(path, data, delimiter=',')
|
||||||
|
|
||||||
rl_msg = ActiveRLRequest()
|
rl_msg = ActiveRLRequest()
|
||||||
rl_msg.interactive_run = 1
|
rl_msg.interactive_run = 2
|
||||||
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
|
rl_msg.weights = self.best_policy[best_policy_idx, :, :].flatten('F').tolist()
|
||||||
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
rl_msg.weights = self.best_weights[best_policy_idx, :, :].flatten('F').tolist()
|
||||||
rl_msg.nr_weights = self.bo_nr_weights
|
rl_msg.nr_weights = self.bo_nr_weights
|
||||||
rl_msg.nr_steps = self.bo_steps
|
rl_msg.nr_steps = self.bo_steps
|
||||||
rl_msg.nr_dims = self.bo_nr_dims
|
rl_msg.nr_dims = self.bo_nr_dims
|
||||||
@ -275,8 +285,6 @@ class ActiveBOTopic(Node):
|
|||||||
|
|
||||||
self.rl_pending = True
|
self.rl_pending = True
|
||||||
|
|
||||||
self.active_rl_pub.publish(rl_msg)
|
|
||||||
|
|
||||||
self.get_logger().info('Responding: Active BO')
|
self.get_logger().info('Responding: Active BO')
|
||||||
self.active_bo_pub.publish(bo_response)
|
self.active_bo_pub.publish(bo_response)
|
||||||
self.reset_bo_request()
|
self.reset_bo_request()
|
||||||
@ -319,16 +327,12 @@ class ActiveBOTopic(Node):
|
|||||||
|
|
||||||
rl_msg = ActiveRLRequest()
|
rl_msg = ActiveRLRequest()
|
||||||
rl_msg.interactive_run = 0
|
rl_msg.interactive_run = 0
|
||||||
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
|
|
||||||
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
||||||
|
rl_msg.weights = old_weights.flatten('F').tolist()
|
||||||
rl_msg.nr_weights = self.bo_nr_weights
|
rl_msg.nr_weights = self.bo_nr_weights
|
||||||
rl_msg.nr_steps = self.bo_steps
|
rl_msg.nr_steps = self.bo_steps
|
||||||
rl_msg.nr_dims = self.bo_nr_dims
|
rl_msg.nr_dims = self.bo_nr_dims
|
||||||
|
|
||||||
self.active_rl_pub.publish(rl_msg)
|
|
||||||
|
|
||||||
self.rl_pending = True
|
|
||||||
|
|
||||||
self.active_rl_pub.publish(rl_msg)
|
self.active_rl_pub.publish(rl_msg)
|
||||||
self.rl_pending = True
|
self.rl_pending = True
|
||||||
|
|
||||||
@ -338,8 +342,8 @@ class ActiveBOTopic(Node):
|
|||||||
|
|
||||||
rl_msg = ActiveRLRequest()
|
rl_msg = ActiveRLRequest()
|
||||||
rl_msg.interactive_run = 1
|
rl_msg.interactive_run = 1
|
||||||
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
|
|
||||||
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
||||||
|
rl_msg.weights = x_next.flatten('F').tolist()
|
||||||
rl_msg.nr_weights = self.bo_nr_weights
|
rl_msg.nr_weights = self.bo_nr_weights
|
||||||
rl_msg.nr_steps = self.bo_steps
|
rl_msg.nr_steps = self.bo_steps
|
||||||
rl_msg.nr_dims = self.bo_nr_dims
|
rl_msg.nr_dims = self.bo_nr_dims
|
||||||
|
Loading…
Reference in New Issue
Block a user