From 4fe3973a530735b76c1a50a8706b864a12d7d231 Mon Sep 17 00:00:00 2001 From: Niko Date: Mon, 24 Apr 2023 15:31:27 +0200 Subject: [PATCH] BOTorch working --- BayesianOptimization/BOwithGym.py | 3 ++- BayesianOptimization/BOwithTorch.py | 29 ++++++++++++++++++----------- ToyTask/MountainCarGym.py | 4 ++-- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/BayesianOptimization/BOwithGym.py b/BayesianOptimization/BOwithGym.py index 08e3a98..00c5716 100644 --- a/BayesianOptimization/BOwithGym.py +++ b/BayesianOptimization/BOwithGym.py @@ -187,6 +187,7 @@ class BayesianOptimization: def get_best_result(self, plotter=True): y_hat = self.gp.predict(self.X) idx = np.argmax(y_hat) + print(idx, np.argmax(self.Y)) x_max = self.X[idx, :] self.policy_model.weights = x_max self.policy_model.policy_rollout() @@ -199,7 +200,7 @@ class BayesianOptimization: def main(): nr_steps = 100 env = Continuous_MountainCarEnv() # render_mode='human' - bo = BayesianOptimization(env, nr_steps, acq='ei') + bo = BayesianOptimization(env, nr_steps, nr_weights=10, acq='ei') bo.initialize() iteration_steps = 200 for i in range(iteration_steps): diff --git a/BayesianOptimization/BOwithTorch.py b/BayesianOptimization/BOwithTorch.py index f54027e..b61865a 100644 --- a/BayesianOptimization/BOwithTorch.py +++ b/BayesianOptimization/BOwithTorch.py @@ -2,7 +2,7 @@ import numpy as np import torch from botorch.models import SingleTaskGP from botorch.optim import optimize_acqf -from gpytorch.kernels import MaternKernel +from gpytorch.kernels import MaternKernel, RBFKernel from botorch.fit import fit_gpytorch_mll from gpytorch.mlls import ExactMarginalLogLikelihood @@ -52,7 +52,7 @@ class BayesianOptimization: self.upper_bound) self.eval_X = 200 - self.eval_restarts = 5 + self.eval_restarts = 10 def reset_bo(self): self.counter_array = np.empty((1, 1)) @@ -104,7 +104,7 @@ class BayesianOptimization: Y = torch.tensor(self.Y_np) - self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5)) + self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel()) mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) fit_gpytorch_mll(mll) @@ -153,7 +153,7 @@ class BayesianOptimization: Y = torch.tensor(self.Y_np) - self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5)) + self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel()) mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) fit_gpytorch_mll(mll) @@ -177,30 +177,37 @@ class BayesianOptimization: self.episode += 1 def get_best_result(self): + Y = torch.tensor(self.Y_np) + self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel()) + mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP) + fit_gpytorch_mll(mll) + y_hat = self.GP.posterior(self.X) - idx = torch.argmax(y_hat) + idx = torch.argmax(y_hat.mean) x_max = self.X[idx, :].detach().numpy() + print(idx, np.argmax(self.Y_np)) + self.policy_model.weights = x_max best_policy = self.policy_model.policy_rollout().reshape(-1, ) - return best_policy, y_hat[idx].detach().numpy(), x_max + return best_policy, y_hat.mean[idx].detach().numpy(), x_max def main(): nr_steps = 100 env = Continuous_MountainCarEnv() # render_mode='human' - bo = BayesianOptimization(env, nr_steps, nr_weights=5, acq="Expected Improvement") + bo = BayesianOptimization(env, nr_steps, nr_weights=10, acq="Expected Improvement") bo.initialize() - iteration_steps = 200 + iteration_steps = 500 for i in range(iteration_steps): x_next = bo.next_observation() step_count = bo.eval_new_observation(x_next) - print(bo.episode, bo.best_reward[-1][0], step_count) + print(bo.episode, bo.best_reward[-1][0], bo.Y_np[-1][0], step_count) - print(bo.Y_np) - print(bo.X_np) + _, a, _ =bo.get_best_result() + print(a) if __name__ == "__main__": diff --git a/ToyTask/MountainCarGym.py b/ToyTask/MountainCarGym.py index 7da049b..82a9058 100644 --- a/ToyTask/MountainCarGym.py +++ b/ToyTask/MountainCarGym.py @@ -165,8 +165,8 @@ class Continuous_MountainCarEnv(gym.Env): reward = 0 if terminated: - reward += 10 - reward -= math.pow(action[0], 2) * 0.1 + reward += 50 + reward -= math.pow(action[0], 2) reward -= 1 self.state = np.array([position, velocity], dtype=np.float32)