BOTorch working

This commit is contained in:
Niko Feith 2023-04-21 12:27:29 +02:00
parent 0cf7850cda
commit 72f3e4e361
2 changed files with 48 additions and 29 deletions

View File

@ -1,6 +1,7 @@
import numpy as np import numpy as np
from scipy.stats import norm from scipy.stats import norm
def ExpectedImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0): def ExpectedImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0):
y_hat = gp.predict(X) y_hat = gp.predict(X)
best_y = max(y_hat) best_y = max(y_hat)

View File

@ -8,21 +8,27 @@ from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch.acquisition import UpperConfidenceBound, ExpectedImprovement, ProbabilityOfImprovement from botorch.acquisition import UpperConfidenceBound, ExpectedImprovement, ProbabilityOfImprovement
import warnings
from botorch.exceptions.warnings import InputDataWarning, BadInitialCandidatesWarning
from PolicyModel.GaussianModel import GaussianPolicy from PolicyModel.GaussianModel import GaussianPolicy
from ToyTask.MountainCarGym import Continuous_MountainCarEnv from ToyTask.MountainCarGym import Continuous_MountainCarEnv
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
torch.set_default_dtype(torch.float64) torch.set_default_dtype(torch.float64)
warnings.filterwarnings("ignore", category=InputDataWarning)
warnings.filterwarnings("ignore", category=BadInitialCandidatesWarning)
class BayesianOptimization: class BayesianOptimization:
def __init__(self, env, nr_steps, nr_init=3, acq="Expected Improvement", nr_weights=6, policy_seed=None): def __init__(self, env, nr_steps, nr_init=5, acq="Expected Improvement", nr_weights=6, policy_seed=None):
self.env = env self.env = env
self.nr_init = nr_init self.nr_init = nr_init
self.acq = acq self.acq = acq
self.X = None self.X = None
self.Y = None self.X_np = None
self.Y_np = None
self.GP = None self.GP = None
self.episode = 0 self.episode = 0
@ -34,7 +40,7 @@ class BayesianOptimization:
self.nr_steps = nr_steps self.nr_steps = nr_steps
self.policy_seed = policy_seed self.policy_seed = policy_seed
self.lower_bound = -1.0 self.lower_bound = 0
self.upper_bound = 1.0 self.upper_bound = 1.0
self.bounds = torch.t(torch.tensor([[self.lower_bound, self.upper_bound]]*self.nr_policy_weights)) self.bounds = torch.t(torch.tensor([[self.lower_bound, self.upper_bound]]*self.nr_policy_weights))
@ -45,7 +51,8 @@ class BayesianOptimization:
self.lower_bound, self.lower_bound,
self.upper_bound) self.upper_bound)
self.eval_X = 512 self.eval_X = 200
self.eval_restarts = 5
def reset_bo(self): def reset_bo(self):
self.counter_array = np.empty((1, 1)) self.counter_array = np.empty((1, 1))
@ -82,82 +89,90 @@ class BayesianOptimization:
self.reset_bo() self.reset_bo()
self.X = torch.zeros((self.nr_init, self.nr_policy_weights)) self.X = torch.zeros((self.nr_init, self.nr_policy_weights))
self.Y = torch.zeros((self.nr_init, 1)) self.X_np = np.zeros((self.nr_init, self.nr_policy_weights))
self.Y_np = np.zeros((self.nr_init, 1))
for i in range(self.nr_init): for i in range(self.nr_init):
self.policy_model.random_policy() self.policy_model.random_policy()
self.X[i, :] = torch.tensor(self.policy_model.weights.T) self.X_np[i, :] = self.policy_model.weights.T.clip(min=-1.0, max=1.0)
self.X[i, :] = torch.tensor((self.policy_model.weights.T.clip(min=-1.0, max=1.0) + 1)/2)
policy = self.policy_model.policy_rollout() policy = self.policy_model.policy_rollout()
reward, step_count = self.runner(policy) reward, step_count = self.runner(policy)
self.Y[i] = reward self.Y_np[i] = reward
self.GP = SingleTaskGP(train_X=self.X, train_Y=self.Y, covar_module=MaternKernel(nu=1.5)) Y = torch.tensor(self.Y_np)
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
fit_gpytorch_mll(mll) fit_gpytorch_mll(mll)
def next_observation(self): def next_observation(self):
if self.acq == "Expected Improvement": if self.acq == "Expected Improvement":
ei = ExpectedImprovement(self.GP, best_f=self.Y.max()) ei = ExpectedImprovement(self.GP, best_f=self.best_reward[-1][0], maximize=True)
x_next, _ = optimize_acqf(ei, x_next, _ = optimize_acqf(ei,
bounds=self.bounds, bounds=self.bounds,
num_restarts=5, num_restarts=self.eval_restarts,
raw_samples=self.eval_X, raw_samples=self.eval_X,
q=1) q=1)
elif self.acq == "Probability of Improvement": elif self.acq == "Probability of Improvement":
poi = ProbabilityOfImprovement(self.GP, best_f=self.Y.max()) poi = ProbabilityOfImprovement(self.GP, best_f=self.best_reward[-1][0], maximize=True)
x_next, _ = optimize_acqf(poi, x_next, _ = optimize_acqf(poi,
bounds=self.bounds, bounds=self.bounds,
num_restarts=5, num_restarts=self.eval_restarts,
raw_samples=self.eval_X, raw_samples=self.eval_X,
q=1) q=1)
elif self.acq == "Upper Confidence Bound": elif self.acq == "Upper Confidence Bound":
ucb = UpperConfidenceBound(self.GP, beta=2.576) ucb = UpperConfidenceBound(self.GP, beta=2.576, maximize=True)
x_next, _ = optimize_acqf(ucb, x_next, _ = optimize_acqf(ucb,
bounds=self.bounds, bounds=self.bounds,
num_restarts=5, num_restarts=self.eval_restarts,
raw_samples=self.eval_X, raw_samples=self.eval_X,
q=1) q=1)
else: else:
raise NotImplementedError raise NotImplementedError
return x_next return torch.t(x_next)
def eval_new_observation(self, x_next): def eval_new_observation(self, x_next):
self.policy_model.weights = x_next.detach().numpy() new_weight = x_next.detach().numpy() * 2 - 1
self.policy_model.weights = new_weight
policy = self.policy_model.policy_rollout() policy = self.policy_model.policy_rollout()
reward, step_count = self.runner(policy) reward, step_count = self.runner(policy)
self.X = torch.vstack((self.X, x_next.reshape(1, -1))) x_clipped = x_next.clip(min=-1.0, max=1.0)
self.Y = torch.vstack((self.Y, torch.tensor(reward).reshape(1, -1)))
self.GP = SingleTaskGP(train_X=self.X, train_Y=self.Y, covar_module=MaternKernel(nu=1.5)) self.X_np = np.vstack((self.X_np, new_weight.reshape(1, -1)))
self.X = torch.vstack((self.X, x_next.reshape(1, -1)))
self.Y_np = np.vstack((self.Y_np, reward))
Y = torch.tensor(self.Y_np)
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
fit_gpytorch_mll(mll) fit_gpytorch_mll(mll)
if self.episode == 0: if self.episode == 0:
self.best_reward[0] = torch.max(self.Y, 1).detach().numpy() self.best_reward[0] = max(self.Y_np)
else: else:
self.best_reward = np.vstack((self.best_reward, torch.max(self.Y, 1).detach().numpy())) self.best_reward = np.vstack((self.best_reward, max(self.Y_np)))
self.episode += 1 self.episode += 1
return step_count return step_count
def add_new_observation(self, reward, x_new): def add_new_observation(self, reward, x_new):
self.X = torch.vstack((self.X, torch.tensor(x_new))) self.X = torch.vstack((self.X, torch.tensor(x_new)))
self.Y = torch.vstack((self.Y, torch.tensor(reward))) self.Y_np = np.vstack((self.Y_np, reward))
if self.episode == 0: if self.episode == 0:
self.best_reward[0] = torch.max(self.Y, 1).detach().numpy() self.best_reward[0] = max(self.Y_np)
else: else:
self.best_reward = np.vstack((self.best_reward, torch.max(self.Y, 1).detach().numpy())) self.best_reward = np.vstack((self.best_reward, max(self.Y_np)))
self.episode += 1 self.episode += 1
@ -175,7 +190,7 @@ class BayesianOptimization:
def main(): def main():
nr_steps = 100 nr_steps = 100
env = Continuous_MountainCarEnv() # render_mode='human' env = Continuous_MountainCarEnv() # render_mode='human'
bo = BayesianOptimization(env, nr_steps, acq="Expected Improvement") bo = BayesianOptimization(env, nr_steps, nr_weights=5, acq="Expected Improvement")
bo.initialize() bo.initialize()
iteration_steps = 200 iteration_steps = 200
for i in range(iteration_steps): for i in range(iteration_steps):
@ -184,6 +199,9 @@ def main():
print(bo.episode, bo.best_reward[-1][0], step_count) print(bo.episode, bo.best_reward[-1][0], step_count)
print(bo.Y_np)
print(bo.X_np)
if __name__ == "__main__": if __name__ == "__main__":
main() main()