BOTorch working

This commit is contained in:
Niko Feith 2023-04-24 15:31:27 +02:00
parent 72f3e4e361
commit 4fe3973a53
3 changed files with 22 additions and 14 deletions

View File

@ -187,6 +187,7 @@ class BayesianOptimization:
def get_best_result(self, plotter=True): def get_best_result(self, plotter=True):
y_hat = self.gp.predict(self.X) y_hat = self.gp.predict(self.X)
idx = np.argmax(y_hat) idx = np.argmax(y_hat)
print(idx, np.argmax(self.Y))
x_max = self.X[idx, :] x_max = self.X[idx, :]
self.policy_model.weights = x_max self.policy_model.weights = x_max
self.policy_model.policy_rollout() self.policy_model.policy_rollout()
@ -199,7 +200,7 @@ class BayesianOptimization:
def main(): def main():
nr_steps = 100 nr_steps = 100
env = Continuous_MountainCarEnv() # render_mode='human' env = Continuous_MountainCarEnv() # render_mode='human'
bo = BayesianOptimization(env, nr_steps, acq='ei') bo = BayesianOptimization(env, nr_steps, nr_weights=10, acq='ei')
bo.initialize() bo.initialize()
iteration_steps = 200 iteration_steps = 200
for i in range(iteration_steps): for i in range(iteration_steps):

View File

@ -2,7 +2,7 @@ import numpy as np
import torch import torch
from botorch.models import SingleTaskGP from botorch.models import SingleTaskGP
from botorch.optim import optimize_acqf from botorch.optim import optimize_acqf
from gpytorch.kernels import MaternKernel from gpytorch.kernels import MaternKernel, RBFKernel
from botorch.fit import fit_gpytorch_mll from botorch.fit import fit_gpytorch_mll
from gpytorch.mlls import ExactMarginalLogLikelihood from gpytorch.mlls import ExactMarginalLogLikelihood
@ -52,7 +52,7 @@ class BayesianOptimization:
self.upper_bound) self.upper_bound)
self.eval_X = 200 self.eval_X = 200
self.eval_restarts = 5 self.eval_restarts = 10
def reset_bo(self): def reset_bo(self):
self.counter_array = np.empty((1, 1)) self.counter_array = np.empty((1, 1))
@ -104,7 +104,7 @@ class BayesianOptimization:
Y = torch.tensor(self.Y_np) Y = torch.tensor(self.Y_np)
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5)) self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
fit_gpytorch_mll(mll) fit_gpytorch_mll(mll)
@ -153,7 +153,7 @@ class BayesianOptimization:
Y = torch.tensor(self.Y_np) Y = torch.tensor(self.Y_np)
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5)) self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
fit_gpytorch_mll(mll) fit_gpytorch_mll(mll)
@ -177,30 +177,37 @@ class BayesianOptimization:
self.episode += 1 self.episode += 1
def get_best_result(self): def get_best_result(self):
Y = torch.tensor(self.Y_np)
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
fit_gpytorch_mll(mll)
y_hat = self.GP.posterior(self.X) y_hat = self.GP.posterior(self.X)
idx = torch.argmax(y_hat) idx = torch.argmax(y_hat.mean)
x_max = self.X[idx, :].detach().numpy() x_max = self.X[idx, :].detach().numpy()
print(idx, np.argmax(self.Y_np))
self.policy_model.weights = x_max self.policy_model.weights = x_max
best_policy = self.policy_model.policy_rollout().reshape(-1, ) best_policy = self.policy_model.policy_rollout().reshape(-1, )
return best_policy, y_hat[idx].detach().numpy(), x_max return best_policy, y_hat.mean[idx].detach().numpy(), x_max
def main(): def main():
nr_steps = 100 nr_steps = 100
env = Continuous_MountainCarEnv() # render_mode='human' env = Continuous_MountainCarEnv() # render_mode='human'
bo = BayesianOptimization(env, nr_steps, nr_weights=5, acq="Expected Improvement") bo = BayesianOptimization(env, nr_steps, nr_weights=10, acq="Expected Improvement")
bo.initialize() bo.initialize()
iteration_steps = 200 iteration_steps = 500
for i in range(iteration_steps): for i in range(iteration_steps):
x_next = bo.next_observation() x_next = bo.next_observation()
step_count = bo.eval_new_observation(x_next) step_count = bo.eval_new_observation(x_next)
print(bo.episode, bo.best_reward[-1][0], step_count) print(bo.episode, bo.best_reward[-1][0], bo.Y_np[-1][0], step_count)
print(bo.Y_np) _, a, _ =bo.get_best_result()
print(bo.X_np) print(a)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -165,8 +165,8 @@ class Continuous_MountainCarEnv(gym.Env):
reward = 0 reward = 0
if terminated: if terminated:
reward += 10 reward += 50
reward -= math.pow(action[0], 2) * 0.1 reward -= math.pow(action[0], 2)
reward -= 1 reward -= 1
self.state = np.array([position, velocity], dtype=np.float32) self.state = np.array([position, velocity], dtype=np.float32)