diff --git a/AcquistionFunctions/ExpectedImprovement.py b/AcquistionFunctions/ExpectedImprovement.py index 6b17a06..4fedb1c 100644 --- a/AcquistionFunctions/ExpectedImprovement.py +++ b/AcquistionFunctions/ExpectedImprovement.py @@ -1,6 +1,7 @@ import numpy as np from scipy.stats import norm + def ExpectedImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0): y_hat = gp.predict(X) best_y = max(y_hat) diff --git a/BayesianOptimization/BOwithTorch.py b/BayesianOptimization/BOwithTorch.py index abb26e3..f54027e 100644 --- a/BayesianOptimization/BOwithTorch.py +++ b/BayesianOptimization/BOwithTorch.py @@ -8,21 +8,27 @@ from gpytorch.mlls import ExactMarginalLogLikelihood from botorch.acquisition import UpperConfidenceBound, ExpectedImprovement, ProbabilityOfImprovement +import warnings +from botorch.exceptions.warnings import InputDataWarning, BadInitialCandidatesWarning + from PolicyModel.GaussianModel import GaussianPolicy from ToyTask.MountainCarGym import Continuous_MountainCarEnv import matplotlib.pyplot as plt torch.set_default_dtype(torch.float64) +warnings.filterwarnings("ignore", category=InputDataWarning) +warnings.filterwarnings("ignore", category=BadInitialCandidatesWarning) class BayesianOptimization: - def __init__(self, env, nr_steps, nr_init=3, acq="Expected Improvement", nr_weights=6, policy_seed=None): + def __init__(self, env, nr_steps, nr_init=5, acq="Expected Improvement", nr_weights=6, policy_seed=None): self.env = env self.nr_init = nr_init self.acq = acq self.X = None - self.Y = None + self.X_np = None + self.Y_np = None self.GP = None self.episode = 0 @@ -34,7 +40,7 @@ class BayesianOptimization: self.nr_steps = nr_steps self.policy_seed = policy_seed - self.lower_bound = -1.0 + self.lower_bound = 0 self.upper_bound = 1.0 self.bounds = torch.t(torch.tensor([[self.lower_bound, self.upper_bound]]*self.nr_policy_weights)) @@ -45,7 +51,8 @@ class BayesianOptimization: self.lower_bound, self.upper_bound) - self.eval_X = 512 + self.eval_X = 200 + self.eval_restarts = 5 def reset_bo(self): self.counter_array = np.empty((1, 1)) @@ -74,90 +81,98 @@ class BayesianOptimization: env_reward += distance * self.distance_penalty self.counter_array = np.vstack((self.counter_array, step_count)) - self.env.reset() - return env_reward, step_count + self.env.reset() + return env_reward, step_count def initialize(self): self.env.reset() self.reset_bo() self.X = torch.zeros((self.nr_init, self.nr_policy_weights)) - self.Y = torch.zeros((self.nr_init, 1)) + self.X_np = np.zeros((self.nr_init, self.nr_policy_weights)) + self.Y_np = np.zeros((self.nr_init, 1)) for i in range(self.nr_init): self.policy_model.random_policy() - self.X[i, :] = torch.tensor(self.policy_model.weights.T) + self.X_np[i, :] = self.policy_model.weights.T.clip(min=-1.0, max=1.0) + self.X[i, :] = torch.tensor((self.policy_model.weights.T.clip(min=-1.0, max=1.0) + 1)/2) policy = self.policy_model.policy_rollout() reward, step_count = self.runner(policy) - self.Y[i] = reward + self.Y_np[i] = reward - self.GP = SingleTaskGP(train_X=self.X, train_Y=self.Y, covar_module=MaternKernel(nu=1.5)) + Y = torch.tensor(self.Y_np) + + self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5)) mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) fit_gpytorch_mll(mll) def next_observation(self): if self.acq == "Expected Improvement": - ei = ExpectedImprovement(self.GP, best_f=self.Y.max()) + ei = ExpectedImprovement(self.GP, best_f=self.best_reward[-1][0], maximize=True) x_next, _ = optimize_acqf(ei, bounds=self.bounds, - num_restarts=5, + num_restarts=self.eval_restarts, raw_samples=self.eval_X, q=1) elif self.acq == "Probability of Improvement": - poi = ProbabilityOfImprovement(self.GP, best_f=self.Y.max()) + poi = ProbabilityOfImprovement(self.GP, best_f=self.best_reward[-1][0], maximize=True) x_next, _ = optimize_acqf(poi, bounds=self.bounds, - num_restarts=5, + num_restarts=self.eval_restarts, raw_samples=self.eval_X, q=1) elif self.acq == "Upper Confidence Bound": - ucb = UpperConfidenceBound(self.GP, beta=2.576) + ucb = UpperConfidenceBound(self.GP, beta=2.576, maximize=True) x_next, _ = optimize_acqf(ucb, bounds=self.bounds, - num_restarts=5, + num_restarts=self.eval_restarts, raw_samples=self.eval_X, q=1) else: raise NotImplementedError - return x_next + return torch.t(x_next) def eval_new_observation(self, x_next): - self.policy_model.weights = x_next.detach().numpy() + new_weight = x_next.detach().numpy() * 2 - 1 + self.policy_model.weights = new_weight policy = self.policy_model.policy_rollout() reward, step_count = self.runner(policy) - self.X = torch.vstack((self.X, x_next.reshape(1, -1))) - self.Y = torch.vstack((self.Y, torch.tensor(reward).reshape(1, -1))) + x_clipped = x_next.clip(min=-1.0, max=1.0) - self.GP = SingleTaskGP(train_X=self.X, train_Y=self.Y, covar_module=MaternKernel(nu=1.5)) + self.X_np = np.vstack((self.X_np, new_weight.reshape(1, -1))) + self.X = torch.vstack((self.X, x_next.reshape(1, -1))) + self.Y_np = np.vstack((self.Y_np, reward)) + + Y = torch.tensor(self.Y_np) + + self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5)) mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) fit_gpytorch_mll(mll) if self.episode == 0: - self.best_reward[0] = torch.max(self.Y, 1).detach().numpy() - + self.best_reward[0] = max(self.Y_np) else: - self.best_reward = np.vstack((self.best_reward, torch.max(self.Y, 1).detach().numpy())) + self.best_reward = np.vstack((self.best_reward, max(self.Y_np))) self.episode += 1 return step_count def add_new_observation(self, reward, x_new): self.X = torch.vstack((self.X, torch.tensor(x_new))) - self.Y = torch.vstack((self.Y, torch.tensor(reward))) + self.Y_np = np.vstack((self.Y_np, reward)) if self.episode == 0: - self.best_reward[0] = torch.max(self.Y, 1).detach().numpy() - + self.best_reward[0] = max(self.Y_np) else: - self.best_reward = np.vstack((self.best_reward, torch.max(self.Y, 1).detach().numpy())) + self.best_reward = np.vstack((self.best_reward, max(self.Y_np))) self.episode += 1 @@ -175,7 +190,7 @@ class BayesianOptimization: def main(): nr_steps = 100 env = Continuous_MountainCarEnv() # render_mode='human' - bo = BayesianOptimization(env, nr_steps, acq="Expected Improvement") + bo = BayesianOptimization(env, nr_steps, nr_weights=5, acq="Expected Improvement") bo.initialize() iteration_steps = 200 for i in range(iteration_steps): @@ -184,6 +199,9 @@ def main(): print(bo.episode, bo.best_reward[-1][0], step_count) + print(bo.Y_np) + print(bo.X_np) + if __name__ == "__main__": main()