BOTorch working
This commit is contained in:
parent
0cf7850cda
commit
72f3e4e361
@ -1,6 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.stats import norm
|
from scipy.stats import norm
|
||||||
|
|
||||||
|
|
||||||
def ExpectedImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0):
|
def ExpectedImprovement(gp, X, nr_test, nr_weights, kappa=2.576, seed=None, lower=-1.0, upper=1.0):
|
||||||
y_hat = gp.predict(X)
|
y_hat = gp.predict(X)
|
||||||
best_y = max(y_hat)
|
best_y = max(y_hat)
|
||||||
|
@ -8,21 +8,27 @@ from gpytorch.mlls import ExactMarginalLogLikelihood
|
|||||||
|
|
||||||
from botorch.acquisition import UpperConfidenceBound, ExpectedImprovement, ProbabilityOfImprovement
|
from botorch.acquisition import UpperConfidenceBound, ExpectedImprovement, ProbabilityOfImprovement
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
from botorch.exceptions.warnings import InputDataWarning, BadInitialCandidatesWarning
|
||||||
|
|
||||||
from PolicyModel.GaussianModel import GaussianPolicy
|
from PolicyModel.GaussianModel import GaussianPolicy
|
||||||
from ToyTask.MountainCarGym import Continuous_MountainCarEnv
|
from ToyTask.MountainCarGym import Continuous_MountainCarEnv
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
torch.set_default_dtype(torch.float64)
|
torch.set_default_dtype(torch.float64)
|
||||||
|
warnings.filterwarnings("ignore", category=InputDataWarning)
|
||||||
|
warnings.filterwarnings("ignore", category=BadInitialCandidatesWarning)
|
||||||
|
|
||||||
class BayesianOptimization:
|
class BayesianOptimization:
|
||||||
def __init__(self, env, nr_steps, nr_init=3, acq="Expected Improvement", nr_weights=6, policy_seed=None):
|
def __init__(self, env, nr_steps, nr_init=5, acq="Expected Improvement", nr_weights=6, policy_seed=None):
|
||||||
self.env = env
|
self.env = env
|
||||||
self.nr_init = nr_init
|
self.nr_init = nr_init
|
||||||
self.acq = acq
|
self.acq = acq
|
||||||
|
|
||||||
self.X = None
|
self.X = None
|
||||||
self.Y = None
|
self.X_np = None
|
||||||
|
self.Y_np = None
|
||||||
self.GP = None
|
self.GP = None
|
||||||
|
|
||||||
self.episode = 0
|
self.episode = 0
|
||||||
@ -34,7 +40,7 @@ class BayesianOptimization:
|
|||||||
self.nr_steps = nr_steps
|
self.nr_steps = nr_steps
|
||||||
self.policy_seed = policy_seed
|
self.policy_seed = policy_seed
|
||||||
|
|
||||||
self.lower_bound = -1.0
|
self.lower_bound = 0
|
||||||
self.upper_bound = 1.0
|
self.upper_bound = 1.0
|
||||||
|
|
||||||
self.bounds = torch.t(torch.tensor([[self.lower_bound, self.upper_bound]]*self.nr_policy_weights))
|
self.bounds = torch.t(torch.tensor([[self.lower_bound, self.upper_bound]]*self.nr_policy_weights))
|
||||||
@ -45,7 +51,8 @@ class BayesianOptimization:
|
|||||||
self.lower_bound,
|
self.lower_bound,
|
||||||
self.upper_bound)
|
self.upper_bound)
|
||||||
|
|
||||||
self.eval_X = 512
|
self.eval_X = 200
|
||||||
|
self.eval_restarts = 5
|
||||||
|
|
||||||
def reset_bo(self):
|
def reset_bo(self):
|
||||||
self.counter_array = np.empty((1, 1))
|
self.counter_array = np.empty((1, 1))
|
||||||
@ -82,82 +89,90 @@ class BayesianOptimization:
|
|||||||
self.reset_bo()
|
self.reset_bo()
|
||||||
|
|
||||||
self.X = torch.zeros((self.nr_init, self.nr_policy_weights))
|
self.X = torch.zeros((self.nr_init, self.nr_policy_weights))
|
||||||
self.Y = torch.zeros((self.nr_init, 1))
|
self.X_np = np.zeros((self.nr_init, self.nr_policy_weights))
|
||||||
|
self.Y_np = np.zeros((self.nr_init, 1))
|
||||||
|
|
||||||
for i in range(self.nr_init):
|
for i in range(self.nr_init):
|
||||||
self.policy_model.random_policy()
|
self.policy_model.random_policy()
|
||||||
self.X[i, :] = torch.tensor(self.policy_model.weights.T)
|
self.X_np[i, :] = self.policy_model.weights.T.clip(min=-1.0, max=1.0)
|
||||||
|
self.X[i, :] = torch.tensor((self.policy_model.weights.T.clip(min=-1.0, max=1.0) + 1)/2)
|
||||||
policy = self.policy_model.policy_rollout()
|
policy = self.policy_model.policy_rollout()
|
||||||
|
|
||||||
reward, step_count = self.runner(policy)
|
reward, step_count = self.runner(policy)
|
||||||
|
|
||||||
self.Y[i] = reward
|
self.Y_np[i] = reward
|
||||||
|
|
||||||
self.GP = SingleTaskGP(train_X=self.X, train_Y=self.Y, covar_module=MaternKernel(nu=1.5))
|
Y = torch.tensor(self.Y_np)
|
||||||
|
|
||||||
|
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
|
||||||
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
||||||
fit_gpytorch_mll(mll)
|
fit_gpytorch_mll(mll)
|
||||||
|
|
||||||
def next_observation(self):
|
def next_observation(self):
|
||||||
if self.acq == "Expected Improvement":
|
if self.acq == "Expected Improvement":
|
||||||
ei = ExpectedImprovement(self.GP, best_f=self.Y.max())
|
ei = ExpectedImprovement(self.GP, best_f=self.best_reward[-1][0], maximize=True)
|
||||||
x_next, _ = optimize_acqf(ei,
|
x_next, _ = optimize_acqf(ei,
|
||||||
bounds=self.bounds,
|
bounds=self.bounds,
|
||||||
num_restarts=5,
|
num_restarts=self.eval_restarts,
|
||||||
raw_samples=self.eval_X,
|
raw_samples=self.eval_X,
|
||||||
q=1)
|
q=1)
|
||||||
|
|
||||||
elif self.acq == "Probability of Improvement":
|
elif self.acq == "Probability of Improvement":
|
||||||
poi = ProbabilityOfImprovement(self.GP, best_f=self.Y.max())
|
poi = ProbabilityOfImprovement(self.GP, best_f=self.best_reward[-1][0], maximize=True)
|
||||||
x_next, _ = optimize_acqf(poi,
|
x_next, _ = optimize_acqf(poi,
|
||||||
bounds=self.bounds,
|
bounds=self.bounds,
|
||||||
num_restarts=5,
|
num_restarts=self.eval_restarts,
|
||||||
raw_samples=self.eval_X,
|
raw_samples=self.eval_X,
|
||||||
q=1)
|
q=1)
|
||||||
|
|
||||||
elif self.acq == "Upper Confidence Bound":
|
elif self.acq == "Upper Confidence Bound":
|
||||||
ucb = UpperConfidenceBound(self.GP, beta=2.576)
|
ucb = UpperConfidenceBound(self.GP, beta=2.576, maximize=True)
|
||||||
x_next, _ = optimize_acqf(ucb,
|
x_next, _ = optimize_acqf(ucb,
|
||||||
bounds=self.bounds,
|
bounds=self.bounds,
|
||||||
num_restarts=5,
|
num_restarts=self.eval_restarts,
|
||||||
raw_samples=self.eval_X,
|
raw_samples=self.eval_X,
|
||||||
q=1)
|
q=1)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
return x_next
|
return torch.t(x_next)
|
||||||
|
|
||||||
def eval_new_observation(self, x_next):
|
def eval_new_observation(self, x_next):
|
||||||
self.policy_model.weights = x_next.detach().numpy()
|
new_weight = x_next.detach().numpy() * 2 - 1
|
||||||
|
self.policy_model.weights = new_weight
|
||||||
policy = self.policy_model.policy_rollout()
|
policy = self.policy_model.policy_rollout()
|
||||||
|
|
||||||
reward, step_count = self.runner(policy)
|
reward, step_count = self.runner(policy)
|
||||||
|
|
||||||
self.X = torch.vstack((self.X, x_next.reshape(1, -1)))
|
x_clipped = x_next.clip(min=-1.0, max=1.0)
|
||||||
self.Y = torch.vstack((self.Y, torch.tensor(reward).reshape(1, -1)))
|
|
||||||
|
|
||||||
self.GP = SingleTaskGP(train_X=self.X, train_Y=self.Y, covar_module=MaternKernel(nu=1.5))
|
self.X_np = np.vstack((self.X_np, new_weight.reshape(1, -1)))
|
||||||
|
self.X = torch.vstack((self.X, x_next.reshape(1, -1)))
|
||||||
|
self.Y_np = np.vstack((self.Y_np, reward))
|
||||||
|
|
||||||
|
Y = torch.tensor(self.Y_np)
|
||||||
|
|
||||||
|
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
|
||||||
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
||||||
fit_gpytorch_mll(mll)
|
fit_gpytorch_mll(mll)
|
||||||
|
|
||||||
if self.episode == 0:
|
if self.episode == 0:
|
||||||
self.best_reward[0] = torch.max(self.Y, 1).detach().numpy()
|
self.best_reward[0] = max(self.Y_np)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.best_reward = np.vstack((self.best_reward, torch.max(self.Y, 1).detach().numpy()))
|
self.best_reward = np.vstack((self.best_reward, max(self.Y_np)))
|
||||||
|
|
||||||
self.episode += 1
|
self.episode += 1
|
||||||
return step_count
|
return step_count
|
||||||
|
|
||||||
def add_new_observation(self, reward, x_new):
|
def add_new_observation(self, reward, x_new):
|
||||||
self.X = torch.vstack((self.X, torch.tensor(x_new)))
|
self.X = torch.vstack((self.X, torch.tensor(x_new)))
|
||||||
self.Y = torch.vstack((self.Y, torch.tensor(reward)))
|
self.Y_np = np.vstack((self.Y_np, reward))
|
||||||
|
|
||||||
if self.episode == 0:
|
if self.episode == 0:
|
||||||
self.best_reward[0] = torch.max(self.Y, 1).detach().numpy()
|
self.best_reward[0] = max(self.Y_np)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.best_reward = np.vstack((self.best_reward, torch.max(self.Y, 1).detach().numpy()))
|
self.best_reward = np.vstack((self.best_reward, max(self.Y_np)))
|
||||||
|
|
||||||
self.episode += 1
|
self.episode += 1
|
||||||
|
|
||||||
@ -175,7 +190,7 @@ class BayesianOptimization:
|
|||||||
def main():
|
def main():
|
||||||
nr_steps = 100
|
nr_steps = 100
|
||||||
env = Continuous_MountainCarEnv() # render_mode='human'
|
env = Continuous_MountainCarEnv() # render_mode='human'
|
||||||
bo = BayesianOptimization(env, nr_steps, acq="Expected Improvement")
|
bo = BayesianOptimization(env, nr_steps, nr_weights=5, acq="Expected Improvement")
|
||||||
bo.initialize()
|
bo.initialize()
|
||||||
iteration_steps = 200
|
iteration_steps = 200
|
||||||
for i in range(iteration_steps):
|
for i in range(iteration_steps):
|
||||||
@ -184,6 +199,9 @@ def main():
|
|||||||
|
|
||||||
print(bo.episode, bo.best_reward[-1][0], step_count)
|
print(bo.episode, bo.best_reward[-1][0], step_count)
|
||||||
|
|
||||||
|
print(bo.Y_np)
|
||||||
|
print(bo.X_np)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
Loading…
Reference in New Issue
Block a user