From 4fe3973a530735b76c1a50a8706b864a12d7d231 Mon Sep 17 00:00:00 2001
From: Niko <nikolaus.feith@unileoben.ac.at>
Date: Mon, 24 Apr 2023 15:31:27 +0200
Subject: [PATCH] BOTorch working

---
 BayesianOptimization/BOwithGym.py   |  3 ++-
 BayesianOptimization/BOwithTorch.py | 29 ++++++++++++++++++-----------
 ToyTask/MountainCarGym.py           |  4 ++--
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/BayesianOptimization/BOwithGym.py b/BayesianOptimization/BOwithGym.py
index 08e3a98..00c5716 100644
--- a/BayesianOptimization/BOwithGym.py
+++ b/BayesianOptimization/BOwithGym.py
@@ -187,6 +187,7 @@ class BayesianOptimization:
     def get_best_result(self, plotter=True):
         y_hat = self.gp.predict(self.X)
         idx = np.argmax(y_hat)
+        print(idx, np.argmax(self.Y))
         x_max = self.X[idx, :]
         self.policy_model.weights = x_max
         self.policy_model.policy_rollout()
@@ -199,7 +200,7 @@ class BayesianOptimization:
 def main():
     nr_steps = 100
     env = Continuous_MountainCarEnv()   # render_mode='human'
-    bo = BayesianOptimization(env, nr_steps, acq='ei')
+    bo = BayesianOptimization(env, nr_steps, nr_weights=10, acq='ei')
     bo.initialize()
     iteration_steps = 200
     for i in range(iteration_steps):
diff --git a/BayesianOptimization/BOwithTorch.py b/BayesianOptimization/BOwithTorch.py
index f54027e..b61865a 100644
--- a/BayesianOptimization/BOwithTorch.py
+++ b/BayesianOptimization/BOwithTorch.py
@@ -2,7 +2,7 @@ import numpy as np
 import torch
 from botorch.models import SingleTaskGP
 from botorch.optim import optimize_acqf
-from gpytorch.kernels import MaternKernel
+from gpytorch.kernels import MaternKernel, RBFKernel
 from botorch.fit import fit_gpytorch_mll
 from gpytorch.mlls import ExactMarginalLogLikelihood
 
@@ -52,7 +52,7 @@ class BayesianOptimization:
                                            self.upper_bound)
 
         self.eval_X = 200
-        self.eval_restarts = 5
+        self.eval_restarts = 10
 
     def reset_bo(self):
         self.counter_array = np.empty((1, 1))
@@ -104,7 +104,7 @@ class BayesianOptimization:
 
         Y = torch.tensor(self.Y_np)
 
-        self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
+        self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
         mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
         fit_gpytorch_mll(mll)
 
@@ -153,7 +153,7 @@ class BayesianOptimization:
 
         Y = torch.tensor(self.Y_np)
 
-        self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
+        self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
         mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
         fit_gpytorch_mll(mll)
 
@@ -177,30 +177,37 @@ class BayesianOptimization:
         self.episode += 1
 
     def get_best_result(self):
+        Y = torch.tensor(self.Y_np)
+        self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
+        mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
+        fit_gpytorch_mll(mll)
+
         y_hat = self.GP.posterior(self.X)
-        idx = torch.argmax(y_hat)
+        idx = torch.argmax(y_hat.mean)
         x_max = self.X[idx, :].detach().numpy()
 
+        print(idx, np.argmax(self.Y_np))
+
         self.policy_model.weights = x_max
         best_policy = self.policy_model.policy_rollout().reshape(-1, )
 
-        return best_policy, y_hat[idx].detach().numpy(), x_max
+        return best_policy, y_hat.mean[idx].detach().numpy(), x_max
 
 
 def main():
     nr_steps = 100
     env = Continuous_MountainCarEnv()   # render_mode='human'
-    bo = BayesianOptimization(env, nr_steps, nr_weights=5, acq="Expected Improvement")
+    bo = BayesianOptimization(env, nr_steps, nr_weights=10, acq="Expected Improvement")
     bo.initialize()
-    iteration_steps = 200
+    iteration_steps = 500
     for i in range(iteration_steps):
         x_next = bo.next_observation()
         step_count = bo.eval_new_observation(x_next)
 
-        print(bo.episode, bo.best_reward[-1][0], step_count)
+        print(bo.episode, bo.best_reward[-1][0], bo.Y_np[-1][0], step_count)
 
-    print(bo.Y_np)
-    print(bo.X_np)
+    _, a, _ =bo.get_best_result()
+    print(a)
 
 
 if __name__ == "__main__":
diff --git a/ToyTask/MountainCarGym.py b/ToyTask/MountainCarGym.py
index 7da049b..82a9058 100644
--- a/ToyTask/MountainCarGym.py
+++ b/ToyTask/MountainCarGym.py
@@ -165,8 +165,8 @@ class Continuous_MountainCarEnv(gym.Env):
 
         reward = 0
         if terminated:
-            reward += 10
-        reward -= math.pow(action[0], 2) * 0.1
+            reward += 50
+        reward -= math.pow(action[0], 2)
         reward -= 1
 
         self.state = np.array([position, velocity], dtype=np.float32)