diff --git a/BayesianOptimization/BOwithGym.py b/BayesianOptimization/BOwithGym.py
index 043ea43..08e3a98 100644
--- a/BayesianOptimization/BOwithGym.py
+++ b/BayesianOptimization/BOwithGym.py
@@ -41,8 +41,15 @@ class BayesianOptimization:
 
         self.nr_test = 100
 
+    def reset_bo(self):
+        self.counter_array = np.zeros((1, 1))
+        self.gp = None
+        self.episode = 0
+        self.best_reward = np.empty((1, 1))
+
     def initialize(self):
         self.env.reset()
+        self.reset_bo()
         if self.env.render_mode == 'human':
             self.env.render()
 
@@ -177,14 +184,17 @@ class BayesianOptimization:
         )
         plt.show()
 
-    def get_best_result(self):
+    def get_best_result(self, plotter=True):
         y_hat = self.gp.predict(self.X)
         idx = np.argmax(y_hat)
         x_max = self.X[idx, :]
         self.policy_model.weights = x_max
         self.policy_model.policy_rollout()
-        print(self.counter_array[idx], idx)
-        self.policy_model.plot_policy(finished=self.counter_array[idx])
+        if plotter:
+            print(self.counter_array[idx], idx)
+            self.policy_model.plot_policy(finished=self.counter_array[idx])
+        else:
+            return self.counter_array[idx]
 
 def main():
     nr_steps = 100
diff --git a/runner/BOGymRunner.py b/runner/BOGymRunner.py
index e69de29..25a43e1 100644
--- a/runner/BOGymRunner.py
+++ b/runner/BOGymRunner.py
@@ -0,0 +1,92 @@
+from BayesianOptimization.BOwithGym import BayesianOptimization
+from ToyTask.MountainCarGym import Continuous_MountainCarEnv
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+# BO parameters
+env  = Continuous_MountainCarEnv()
+nr_steps = 100
+acquisition_fun = 'ei'
+iteration_steps = 500
+
+nr_runs = 20
+
+# storage arrays
+finished_store = np.zeros((1, nr_runs))
+best_policy = np.zeros((nr_steps, nr_runs))
+reward_store = np.zeros((iteration_steps, nr_runs))
+
+# post-processing
+def post_processing(finished, policy, reward):
+
+    finish_mean = np.nanmean(finished)
+    finish_std = np.nanstd(finished)
+
+    policy_mean = np.mean(policy, axis=1)
+    policy_std = np.std(policy, axis=1)
+
+    reward_mean = np.mean(reward, axis=1)
+    reward_std = np.std(reward, axis=1)
+
+    return finish_mean, finish_std, policy_mean, policy_std, reward_mean, reward_std
+
+# plot functions
+def plot_policy(mean, std, fin_mean, fin_std):
+    x = np.linspace(0, mean.shape[0], mean.shape[0])
+    plt.plot(x, mean)
+    plt.fill_between(
+        x,
+        mean - 1.96 * std,
+        mean + 1.96 * std,
+        alpha=0.5
+    )
+
+    y = np.linspace(-2, 2, 50)
+    plt.vlines(fin_mean, -2, 2, colors='red')
+    plt.fill_betweenx(
+        y,
+        fin_mean - 1.96 * fin_std,
+        fin_mean + 1.96 * fin_std,
+        alpha=0.5,
+    )
+
+    plt.show()
+
+def plot_reward(mean, std):
+    eps = np.linspace(0, mean.shape[0], mean.shape[0])
+    plt.plot(eps, mean)
+
+    plt.fill_between(
+        eps,
+        mean - 1.96 * std,
+        mean + 1.96 * std,
+        alpha=0.5
+    )
+    plt.show()
+
+# main
+def main():
+    global finished_store, best_policy, reward_store
+    bo = BayesianOptimization(env, nr_steps, acq=acquisition_fun)
+    for i in range(nr_runs):
+        print('Iteration:', str(i))
+        bo.initialize()
+        for j in range(iteration_steps):
+            x_next = bo.next_observation()
+            bo.eval_new_observation(x_next)
+
+        finished = bo.get_best_result(plotter=False)
+
+        finished_store[:, i] = finished
+        best_policy[:, i] = bo.policy_model.trajectory.T
+        reward_store[:, i] = bo.best_reward.T
+
+    finish_mean, finish_std, policy_mean, policy_std, reward_mean, reward_std = post_processing(finished_store,
+                                                                                                best_policy,
+                                                                                                reward_store)
+    plot_policy(policy_mean, policy_std, finish_mean, finish_std)
+    plot_reward(reward_mean, reward_std)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file