Franka experiment complete + debugged
This commit is contained in:
parent
54c196715e
commit
739bc5e160
@ -3,34 +3,47 @@ from scipy.stats import norm, multivariate_normal
|
||||
|
||||
|
||||
class PreferenceExpectedImprovement:
|
||||
def __init__(self, nr_dims, nr_samples, lower_bound, upper_bound, initial_variance, update_variance, seed=None):
|
||||
def __init__(self, nr_dims, nr_samples, initial_variance, update_variance, lower_bound=None, upper_bound=None, seed=None,
|
||||
fixed_dims=None):
|
||||
|
||||
self.nr_dims = nr_dims
|
||||
self.nr_samples = int(nr_samples)
|
||||
|
||||
self.lower_bound = lower_bound
|
||||
self.upper_bound = upper_bound
|
||||
|
||||
self.init_var = initial_variance
|
||||
self.update_var = update_variance
|
||||
|
||||
if lower_bound is None:
|
||||
self.lower_bound = [-1.] * nr_dims
|
||||
else:
|
||||
self.lower_bound = lower_bound
|
||||
|
||||
if upper_bound is None:
|
||||
self.upper_bound = [1.] * nr_dims
|
||||
else:
|
||||
self.upper_bound = upper_bound
|
||||
|
||||
self.rng = np.random.default_rng(seed=seed)
|
||||
|
||||
# initial proposal distribution
|
||||
self.proposal_mean = np.zeros((nr_dims, 1))
|
||||
self.proposal_cov = np.diag(np.ones((nr_dims,)) * self.init_var)
|
||||
|
||||
# fixed dimension for robot experiment
|
||||
self.fixed_dims = fixed_dims
|
||||
|
||||
def rejection_sampling(self):
|
||||
samples = np.empty((0, self.nr_dims))
|
||||
while samples.shape[0] < self.nr_samples:
|
||||
# sample from the multi variate gaussian distribution
|
||||
sample = np.zeros((1, self.nr_dims))
|
||||
for i in range(self.nr_dims):
|
||||
check = False
|
||||
while not check:
|
||||
sample[0, i] = self.rng.normal(self.proposal_mean[i], self.proposal_cov[i, i])
|
||||
if self.lower_bound <= sample[0, i] <= self.upper_bound:
|
||||
check = True
|
||||
if i in self.fixed_dims:
|
||||
sample[0, i] = self.fixed_dims[i]
|
||||
else:
|
||||
check = False
|
||||
while not check:
|
||||
sample[0, i] = self.rng.normal(self.proposal_mean[i], self.proposal_cov[i, i])
|
||||
if self.lower_bound[i] <= sample[0, i] <= self.upper_bound[i]:
|
||||
check = True
|
||||
|
||||
samples = np.append(samples, sample, axis=0)
|
||||
|
||||
|
@ -16,7 +16,8 @@ warnings.filterwarnings('ignore', category=ConvergenceWarning)
|
||||
|
||||
|
||||
class BayesianOptimization:
|
||||
def __init__(self, nr_steps, nr_dims, nr_weights, acq='ei', seed=None):
|
||||
def __init__(self, nr_steps, nr_dims, nr_weights, acq='ei', seed=None,
|
||||
fixed_dims=None, lower_bound=None, upper_bound=None):
|
||||
self.acq = acq
|
||||
self.episode = 0
|
||||
|
||||
@ -25,16 +26,34 @@ class BayesianOptimization:
|
||||
self.nr_weights = nr_weights
|
||||
self.weights = self.nr_weights * self.nr_dims
|
||||
|
||||
self.lower_bound = -1.0
|
||||
self.upper_bound = 1.0
|
||||
if lower_bound is None:
|
||||
self.lower_bound = [-1.]*nr_dims
|
||||
self.lower_bound_1D = [-1.] * self.weights
|
||||
else:
|
||||
self.lower_bound = lower_bound
|
||||
self.lower_bound_1D = [x for x in lower_bound for _ in range(self.nr_weights)]
|
||||
|
||||
if upper_bound is None:
|
||||
self.upper_bound = [1.]*nr_dims
|
||||
self.upper_bound_1D = [1.] * self.weights
|
||||
else:
|
||||
self.upper_bound = upper_bound
|
||||
self.upper_bound_1D = [x for x in upper_bound for _ in range(self.nr_weights)]
|
||||
|
||||
self.seed = seed
|
||||
|
||||
self.X = None
|
||||
self.Y = None
|
||||
self.gp = None
|
||||
|
||||
self.fixed_dims = fixed_dims
|
||||
|
||||
if fixed_dims is not None:
|
||||
self.fixed_dims_1D = self.convert_fixed_dims(fixed_dims, nr_weights)
|
||||
|
||||
self.policy_model = GaussianRBF(self.nr_steps, self.nr_weights, self.nr_dims,
|
||||
lowerb=self.lower_bound, upperb=self.upper_bound, seed=seed)
|
||||
lower_bound=self.lower_bound, upper_bound=self.upper_bound, seed=seed,
|
||||
fixed_dims=fixed_dims)
|
||||
|
||||
self.acq_sample_size = 100
|
||||
|
||||
@ -43,16 +62,26 @@ class BayesianOptimization:
|
||||
if acq == "Preference Expected Improvement":
|
||||
self.acq_fun = PreferenceExpectedImprovement(self.weights,
|
||||
self.acq_sample_size,
|
||||
self.lower_bound,
|
||||
self.upper_bound,
|
||||
initial_variance=10.0,
|
||||
update_variance=0.05,
|
||||
seed=seed)
|
||||
seed=seed,
|
||||
fixed_dims=self.fixed_dims_1D,
|
||||
lower_bound=self.lower_bound_1D,
|
||||
upper_bound=self.upper_bound_1D,)
|
||||
|
||||
self.reset_bo()
|
||||
|
||||
@staticmethod
|
||||
def convert_fixed_dims(fixed_dims, nr_weights):
|
||||
new_fixed_dims = {}
|
||||
for dim, values in fixed_dims.items():
|
||||
for idx, value in enumerate(values):
|
||||
new_dim = dim + idx * nr_weights
|
||||
new_fixed_dims[new_dim] = value
|
||||
return new_fixed_dims
|
||||
|
||||
def reset_bo(self):
|
||||
self.gp = GaussianProcessRegressor(Matern(nu=1.5, ), n_restarts_optimizer=5) #length_scale=(1e-8, 1e5)
|
||||
self.gp = GaussianProcessRegressor(Matern(nu=1.5, ), n_restarts_optimizer=5) # length_scale=(1e-8, 1e5)
|
||||
self.best_reward = np.empty((1, 1))
|
||||
self.X = np.zeros((1, self.weights), dtype=np.float64)
|
||||
self.Y = np.zeros((1, 1), dtype=np.float64)
|
||||
@ -117,4 +146,3 @@ class BayesianOptimization:
|
||||
x_max = self.X[idx, :].reshape((self.nr_weights, self.nr_dims), order='F')
|
||||
|
||||
return y_max, x_max, idx
|
||||
|
||||
|
@ -3,7 +3,7 @@ import numpy as np
|
||||
|
||||
|
||||
class GaussianRBF:
|
||||
def __init__(self, nr_steps, nr_weights, nr_dims, lowerb=-1.0, upperb=1.0, seed=None):
|
||||
def __init__(self, nr_steps, nr_weights, nr_dims, lower_bound=None, upper_bound=None, seed=None, fixed_dims=None):
|
||||
self.nr_weights = nr_weights
|
||||
self.nr_steps = nr_steps
|
||||
self.nr_dims = nr_dims
|
||||
@ -11,11 +11,20 @@ class GaussianRBF:
|
||||
self.weights = None
|
||||
self.trajectory = None
|
||||
|
||||
self.lowerb = lowerb
|
||||
self.upperb = upperb
|
||||
if lower_bound is None:
|
||||
self.lower_bound = [-1.]*nr_dims
|
||||
else:
|
||||
self.lower_bound = lower_bound
|
||||
|
||||
if upper_bound is None:
|
||||
self.upper_bound = [1.]*nr_dims
|
||||
else:
|
||||
self.upper_bound = upper_bound
|
||||
|
||||
self.rng = np.random.default_rng(seed=seed)
|
||||
|
||||
self.fixed_dims = fixed_dims
|
||||
|
||||
# initialize
|
||||
self.mid_points = np.linspace(0, self.nr_steps, self.nr_weights)
|
||||
if nr_weights > 1:
|
||||
@ -30,8 +39,13 @@ class GaussianRBF:
|
||||
self.trajectory = np.zeros((self.nr_steps, self.nr_dims))
|
||||
|
||||
def random_weights(self):
|
||||
for dim in range(self.nr_dims):
|
||||
self.weights[:, dim] = self.rng.uniform(self.lowerb, self.upperb, self.nr_weights)
|
||||
for w in range(self.nr_weights):
|
||||
# If dim exists in fixed_dims, set weights directly
|
||||
if w in self.fixed_dims:
|
||||
self.weights[w, :] = np.array(self.fixed_dims[w])
|
||||
else:
|
||||
for dim in range(self.nr_dims):
|
||||
self.weights[w, dim] = self.rng.uniform(self.lower_bound[dim], self.upper_bound[dim], 1)
|
||||
return self.weights
|
||||
|
||||
def rollout(self):
|
||||
|
@ -101,6 +101,11 @@ class ActiveBOTopic(Node):
|
||||
self.mainloop_callback,
|
||||
callback_group=mainloop_callback_group)
|
||||
|
||||
# Franka Experiment
|
||||
self.fixed_dimension = {0: [0.5, -0.3]}
|
||||
self.lower_bound = [0.3, -0.35]
|
||||
self.upper_bound = [0.7, 0.35]
|
||||
|
||||
def reset_bo_request(self):
|
||||
self.bo_env = None
|
||||
self.bo_metric = None
|
||||
@ -199,7 +204,10 @@ class ActiveBOTopic(Node):
|
||||
self.BO = BayesianOptimization(self.bo_steps,
|
||||
self.bo_nr_dims,
|
||||
self.bo_nr_weights,
|
||||
acq=self.bo_acq_fcn)
|
||||
acq=self.bo_acq_fcn,
|
||||
fixed_dims=self.fixed_dimension,
|
||||
lower_bound=self.lower_bound,
|
||||
upper_bound=self.upper_bound)
|
||||
|
||||
self.BO.reset_bo()
|
||||
|
||||
@ -209,7 +217,7 @@ class ActiveBOTopic(Node):
|
||||
if self.init_pending:
|
||||
|
||||
rl_msg = ActiveRLRequest()
|
||||
rl_msg.interactive_run = 2
|
||||
rl_msg.interactive_run = 1
|
||||
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
|
||||
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
||||
rl_msg.nr_weights = self.bo_nr_weights
|
||||
@ -237,6 +245,8 @@ class ActiveBOTopic(Node):
|
||||
env = 're'
|
||||
elif self.bo_env == "Finger":
|
||||
env = 'fin'
|
||||
elif self.bo_env == "Franka":
|
||||
env = 'franka'
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
@ -264,9 +274,9 @@ class ActiveBOTopic(Node):
|
||||
np.savetxt(path, data, delimiter=',')
|
||||
|
||||
rl_msg = ActiveRLRequest()
|
||||
rl_msg.interactive_run = 1
|
||||
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
|
||||
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
||||
rl_msg.interactive_run = 2
|
||||
rl_msg.weights = self.best_policy[best_policy_idx, :, :].flatten('F').tolist()
|
||||
rl_msg.weights = self.best_weights[best_policy_idx, :, :].flatten('F').tolist()
|
||||
rl_msg.nr_weights = self.bo_nr_weights
|
||||
rl_msg.nr_steps = self.bo_steps
|
||||
rl_msg.nr_dims = self.bo_nr_dims
|
||||
@ -275,8 +285,6 @@ class ActiveBOTopic(Node):
|
||||
|
||||
self.rl_pending = True
|
||||
|
||||
self.active_rl_pub.publish(rl_msg)
|
||||
|
||||
self.get_logger().info('Responding: Active BO')
|
||||
self.active_bo_pub.publish(bo_response)
|
||||
self.reset_bo_request()
|
||||
@ -319,16 +327,12 @@ class ActiveBOTopic(Node):
|
||||
|
||||
rl_msg = ActiveRLRequest()
|
||||
rl_msg.interactive_run = 0
|
||||
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
|
||||
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
||||
rl_msg.weights = old_weights.flatten('F').tolist()
|
||||
rl_msg.nr_weights = self.bo_nr_weights
|
||||
rl_msg.nr_steps = self.bo_steps
|
||||
rl_msg.nr_dims = self.bo_nr_dims
|
||||
|
||||
self.active_rl_pub.publish(rl_msg)
|
||||
|
||||
self.rl_pending = True
|
||||
|
||||
self.active_rl_pub.publish(rl_msg)
|
||||
self.rl_pending = True
|
||||
|
||||
@ -338,8 +342,8 @@ class ActiveBOTopic(Node):
|
||||
|
||||
rl_msg = ActiveRLRequest()
|
||||
rl_msg.interactive_run = 1
|
||||
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
|
||||
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
||||
rl_msg.weights = x_next.flatten('F').tolist()
|
||||
rl_msg.nr_weights = self.bo_nr_weights
|
||||
rl_msg.nr_steps = self.bo_steps
|
||||
rl_msg.nr_dims = self.bo_nr_dims
|
||||
|
Loading…
Reference in New Issue
Block a user