From c3271fe6d264fbdc3e47ff2cc008d88662a5a5b1 Mon Sep 17 00:00:00 2001
From: Niko <nikolaus.feith@unileoben.ac.at>
Date: Mon, 24 Jul 2023 18:47:54 +0200
Subject: [PATCH] Finished with PreferenceExpectedImprovement.py

---
 .../PreferenceExpectedImprovement.py          | 154 ++++++++++++++----
 plotter/reward_plotter.py                     |   4 +-
 2 files changed, 121 insertions(+), 37 deletions(-)

diff --git a/AcquistionFunctions/PreferenceExpectedImprovement.py b/AcquistionFunctions/PreferenceExpectedImprovement.py
index a5a6368..1d2e935 100644
--- a/AcquistionFunctions/PreferenceExpectedImprovement.py
+++ b/AcquistionFunctions/PreferenceExpectedImprovement.py
@@ -1,63 +1,149 @@
 import numpy as np
-from scipy.stats import norm
+from scipy.stats import norm, multivariate_normal
+import matplotlib.pyplot as plt
 
 
 class PreferenceExpectedImprovement:
-    def __init__(self, nr_samples, nr_dims, lower_bound, upper_bound, seed=None):
+    def __init__(self, nr_samples, nr_dims, nr_likelihood_samples, lower_bound, upper_bound, init_var, seed=None):
 
-        self.nr_samples = nr_samples
+        self.nr_samples = int(nr_samples)
         self.nr_dims = nr_dims
-        # check if upper_bound and lower_bound are numpy arrays of shape (nr_dims, 1) or (nr_dims,) or if they are floats
+        self.nr_likelihood_samples = int(nr_likelihood_samples)
 
+        # check if upper_bound and lower_bound are numpy arrays of shape (nr_dims, 1) or (nr_dims,) or if they are float
         self.upper_bound = upper_bound
         self.lower_bound = lower_bound
 
-        self.initial_variance = 5.0
+        self.initial_variance = init_var
 
-        self.user_model = None
-        self.proposal_model_mean = np.array((nr_dims, 1))
-        self.proposal_model_covariance = np.diag(np.ones((nr_dims, )) * self.initial_variance)
+        self.proposal_mean = np.zeros((nr_dims, 1))
+        self.proposal_cov = np.diag(np.ones((nr_dims,)) * self.initial_variance)
 
         self.rng = np.random.default_rng(seed=seed)
 
-    def initialize(self):
-        pass
-
     def rejection_sampling(self):
         samples = np.empty((0, self.nr_dims))
         while samples.shape[0] < self.nr_samples:
             # sample from the multi variate gaussian distribution
-            sample = self.rng.multivariate_normal(
-                self.proposal_model_mean,
-                self.proposal_model_covariance
-            )
+            sample = np.zeros((1, self.nr_dims))
+            for i in range(self.nr_dims):
+                check = False
+                while not check:
+                    sample[0, i] = self.rng.normal(self.proposal_mean[i], self.proposal_cov[i, i])
+                    if self.lower_bound <= sample[0, i] <= self.upper_bound:
+                        check = True
 
-            # check if the sample is within the bounds
-            if np.all(sample >= self.lower_bound) and np.all(sample <= self.upper_bound):
-                samples = np.append(samples, [sample], axis=0)
+            samples = np.append(samples, sample, axis=0)
+            # sample = self.rng.multivariate_normal(
+            #     mean,
+            #     self.proposal_cov
+            # )
+            #
+            # # check if the sample is within the bounds
+            # if np.all(sample >= self.lower_bound) and np.all(sample <= self.upper_bound):
+            #     samples = np.append(samples, [sample], axis=0)
 
         return samples
 
+    def expected_improvement(self, gp, X, kappa=0.01):
+        X_sample = self.rejection_sampling()
 
-    def expected_improvement(self):
-        pass
+        mu_sample, sigma_sample = gp.predict(X_sample, return_std=True)
+        sigma_sample = sigma_sample.reshape(-1, 1)
 
-    def update_user_preference_model(self, preferred_input, preference_array):
-        # Update mean to reflect preferred input
-        self.user_model_mean = preferred_input
+        mu = gp.predict(X)
+        mu_best = np.max(mu)
 
-        initial_variance = np.ones((self.nr_dims, )) * self.initial_variance
-        reduced_variance = initial_variance / 10.0
-        variances = np.where(preference_array, reduced_variance, initial_variance)
-        self.user_model_covariance = np.diag(variances)
+        with np.errstate(divide='warn'):
+            imp = mu_sample - mu_best - kappa
+            imp = imp.reshape(-1, 1)
+            z = imp / sigma_sample
+            ei = imp * norm.cdf(z) + sigma_sample * norm.pdf(z)
+            ei[sigma_sample == 0.0] = 0.0
 
-    def update_proposal_model(self, alpha=0.5):
-        # Update proposal model to be a weighted average of the current proposal model and the user model
-        self.proposal_model_mean = alpha * self.proposal_model_mean + (1 - alpha) * self.user_model_mean
-        self.proposal_model_covariance = alpha * self.proposal_model_covariance + (1 - alpha) * self.user_model_covariance
+        idx = np.argmax(ei)
+        x_next = X_sample[idx, :]
+
+        return x_next
+
+    def likelihood(self, preference):
+        covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance
+        covariance_diag[preference] = 0.1
+
+        covariance = np.diag(covariance_diag)
+
+        return covariance
+
+    def update_proposal_model(self, preference_mean, preference_bool):
+
+        covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance
+        covariance_diag[preference_bool] = 0.1
+
+        preference_cov = np.diag(covariance_diag)
+
+        preference_mean = preference_mean.reshape(-1, 1)
+
+        posterior_mean = np.linalg.inv(np.linalg.inv(self.proposal_cov) + np.linalg.inv(preference_cov)).dot(np.linalg.inv(self.proposal_cov).dot(self.proposal_mean) + np.linalg.inv(preference_cov).dot(preference_mean))
+        posterior_cov = np.linalg.inv(np.linalg.inv(self.proposal_cov) + np.linalg.inv(preference_cov))
+
+        print(posterior_mean, posterior_cov)
+
+        self.proposal_mean = posterior_mean
+        self.proposal_cov = posterior_cov
+
+    def plot_2D(self, mean, cov):
+        print(mean.shape, cov.shape)
+        if mean.shape == (2, 1):
+            mean = mean.squeeze()
+
+        gaussian = multivariate_normal(mean, cov)
+        x = np.random.uniform(self.lower_bound, self.upper_bound, (self.nr_likelihood_samples, self.nr_dims))
+
+        pdf = gaussian.pdf(x)
+        pdf = pdf / pdf.sum()
+
+        plt.scatter(x[:, 0], x[:, 1], c=pdf, cmap='viridis')
+
+        # Add a color bar
+        plt.colorbar(label='PDF value')
+
+        # Add labels
+        plt.xlabel('x1')
+        plt.ylabel('x2')
+
+        # Show the plot
+        plt.show()
 
 
 if __name__ == '__main__':
-    acquisition = PreferenceExpectedImprovement(10, 2, -1.0, 1.0)
-
-
+    acquisition = PreferenceExpectedImprovement(10, 10, 10e4, -1.0, 1.0, 5.0)
+    sample_res = acquisition.rejection_sampling()
+    print(f"finished: {sample_res}")
+    # acquisition = PreferenceExpectedImprovement(10, 2, 10e4, -1.0, 1.0, 10.0)
+    # mean_ = np.array([0.5, 0.23])
+    # preference_ = [False, True]
+    # likelihood_cov = acquisition.likelihood(preference_)
+    #
+    # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
+    # acquisition.plot_2D(mean_, likelihood_cov)
+    #
+    # acquisition.update_proposal_model(mean_, preference_)
+    # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
+    #
+    # mean2 = np.array([0.33, 0.24])
+    # preference2 = [True, False]
+    # likelihood_cov2 = acquisition.likelihood(preference2)
+    #
+    # acquisition.plot_2D(mean2, likelihood_cov2)
+    #
+    # acquisition.update_proposal_model(mean2, preference2)
+    # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
+    #
+    # mean2 = np.array([-0.66, -0.5])
+    # preference2 = [True, True]
+    # likelihood_cov2 = acquisition.likelihood(preference2)
+    #
+    # acquisition.plot_2D(mean2, likelihood_cov2)
+    #
+    # acquisition.update_proposal_model(mean2, preference2)
+    # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
diff --git a/plotter/reward_plotter.py b/plotter/reward_plotter.py
index 8eafcbb..8809bb3 100644
--- a/plotter/reward_plotter.py
+++ b/plotter/reward_plotter.py
@@ -36,9 +36,7 @@ def plot_csv(paths, x_axis, y_axis):
 
 if __name__ == '__main__':
     filenames = ['cp-ei-bo--20-1687348670_183786.csv',
-                 'cp-ei-random-0_95-20-1687351879_7839339.csv',
-                 'cp-ei-regular-20_0-20-1687340326_8181093.csv',
-                 'cp-ei-improvement-0_2-20-1687348464_2842393.csv',
+                 'cp-pei-regular-20_0-20-1690217019_225133.csv',
                  ]
     home_dir = os.path.expanduser('~')
     file_path = os.path.join(home_dir, 'Documents/IntRLResults/cp-e100r10-bf20')