From c3271fe6d264fbdc3e47ff2cc008d88662a5a5b1 Mon Sep 17 00:00:00 2001 From: Niko Date: Mon, 24 Jul 2023 18:47:54 +0200 Subject: [PATCH] Finished with PreferenceExpectedImprovement.py --- .../PreferenceExpectedImprovement.py | 154 ++++++++++++++---- plotter/reward_plotter.py | 4 +- 2 files changed, 121 insertions(+), 37 deletions(-) diff --git a/AcquistionFunctions/PreferenceExpectedImprovement.py b/AcquistionFunctions/PreferenceExpectedImprovement.py index a5a6368..1d2e935 100644 --- a/AcquistionFunctions/PreferenceExpectedImprovement.py +++ b/AcquistionFunctions/PreferenceExpectedImprovement.py @@ -1,63 +1,149 @@ import numpy as np -from scipy.stats import norm +from scipy.stats import norm, multivariate_normal +import matplotlib.pyplot as plt class PreferenceExpectedImprovement: - def __init__(self, nr_samples, nr_dims, lower_bound, upper_bound, seed=None): + def __init__(self, nr_samples, nr_dims, nr_likelihood_samples, lower_bound, upper_bound, init_var, seed=None): - self.nr_samples = nr_samples + self.nr_samples = int(nr_samples) self.nr_dims = nr_dims - # check if upper_bound and lower_bound are numpy arrays of shape (nr_dims, 1) or (nr_dims,) or if they are floats + self.nr_likelihood_samples = int(nr_likelihood_samples) + # check if upper_bound and lower_bound are numpy arrays of shape (nr_dims, 1) or (nr_dims,) or if they are float self.upper_bound = upper_bound self.lower_bound = lower_bound - self.initial_variance = 5.0 + self.initial_variance = init_var - self.user_model = None - self.proposal_model_mean = np.array((nr_dims, 1)) - self.proposal_model_covariance = np.diag(np.ones((nr_dims, )) * self.initial_variance) + self.proposal_mean = np.zeros((nr_dims, 1)) + self.proposal_cov = np.diag(np.ones((nr_dims,)) * self.initial_variance) self.rng = np.random.default_rng(seed=seed) - def initialize(self): - pass - def rejection_sampling(self): samples = np.empty((0, self.nr_dims)) while samples.shape[0] < self.nr_samples: # sample from the multi variate gaussian distribution - sample = self.rng.multivariate_normal( - self.proposal_model_mean, - self.proposal_model_covariance - ) + sample = np.zeros((1, self.nr_dims)) + for i in range(self.nr_dims): + check = False + while not check: + sample[0, i] = self.rng.normal(self.proposal_mean[i], self.proposal_cov[i, i]) + if self.lower_bound <= sample[0, i] <= self.upper_bound: + check = True - # check if the sample is within the bounds - if np.all(sample >= self.lower_bound) and np.all(sample <= self.upper_bound): - samples = np.append(samples, [sample], axis=0) + samples = np.append(samples, sample, axis=0) + # sample = self.rng.multivariate_normal( + # mean, + # self.proposal_cov + # ) + # + # # check if the sample is within the bounds + # if np.all(sample >= self.lower_bound) and np.all(sample <= self.upper_bound): + # samples = np.append(samples, [sample], axis=0) return samples + def expected_improvement(self, gp, X, kappa=0.01): + X_sample = self.rejection_sampling() - def expected_improvement(self): - pass + mu_sample, sigma_sample = gp.predict(X_sample, return_std=True) + sigma_sample = sigma_sample.reshape(-1, 1) - def update_user_preference_model(self, preferred_input, preference_array): - # Update mean to reflect preferred input - self.user_model_mean = preferred_input + mu = gp.predict(X) + mu_best = np.max(mu) - initial_variance = np.ones((self.nr_dims, )) * self.initial_variance - reduced_variance = initial_variance / 10.0 - variances = np.where(preference_array, reduced_variance, initial_variance) - self.user_model_covariance = np.diag(variances) + with np.errstate(divide='warn'): + imp = mu_sample - mu_best - kappa + imp = imp.reshape(-1, 1) + z = imp / sigma_sample + ei = imp * norm.cdf(z) + sigma_sample * norm.pdf(z) + ei[sigma_sample == 0.0] = 0.0 - def update_proposal_model(self, alpha=0.5): - # Update proposal model to be a weighted average of the current proposal model and the user model - self.proposal_model_mean = alpha * self.proposal_model_mean + (1 - alpha) * self.user_model_mean - self.proposal_model_covariance = alpha * self.proposal_model_covariance + (1 - alpha) * self.user_model_covariance + idx = np.argmax(ei) + x_next = X_sample[idx, :] + + return x_next + + def likelihood(self, preference): + covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance + covariance_diag[preference] = 0.1 + + covariance = np.diag(covariance_diag) + + return covariance + + def update_proposal_model(self, preference_mean, preference_bool): + + covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance + covariance_diag[preference_bool] = 0.1 + + preference_cov = np.diag(covariance_diag) + + preference_mean = preference_mean.reshape(-1, 1) + + posterior_mean = np.linalg.inv(np.linalg.inv(self.proposal_cov) + np.linalg.inv(preference_cov)).dot(np.linalg.inv(self.proposal_cov).dot(self.proposal_mean) + np.linalg.inv(preference_cov).dot(preference_mean)) + posterior_cov = np.linalg.inv(np.linalg.inv(self.proposal_cov) + np.linalg.inv(preference_cov)) + + print(posterior_mean, posterior_cov) + + self.proposal_mean = posterior_mean + self.proposal_cov = posterior_cov + + def plot_2D(self, mean, cov): + print(mean.shape, cov.shape) + if mean.shape == (2, 1): + mean = mean.squeeze() + + gaussian = multivariate_normal(mean, cov) + x = np.random.uniform(self.lower_bound, self.upper_bound, (self.nr_likelihood_samples, self.nr_dims)) + + pdf = gaussian.pdf(x) + pdf = pdf / pdf.sum() + + plt.scatter(x[:, 0], x[:, 1], c=pdf, cmap='viridis') + + # Add a color bar + plt.colorbar(label='PDF value') + + # Add labels + plt.xlabel('x1') + plt.ylabel('x2') + + # Show the plot + plt.show() if __name__ == '__main__': - acquisition = PreferenceExpectedImprovement(10, 2, -1.0, 1.0) - - + acquisition = PreferenceExpectedImprovement(10, 10, 10e4, -1.0, 1.0, 5.0) + sample_res = acquisition.rejection_sampling() + print(f"finished: {sample_res}") + # acquisition = PreferenceExpectedImprovement(10, 2, 10e4, -1.0, 1.0, 10.0) + # mean_ = np.array([0.5, 0.23]) + # preference_ = [False, True] + # likelihood_cov = acquisition.likelihood(preference_) + # + # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) + # acquisition.plot_2D(mean_, likelihood_cov) + # + # acquisition.update_proposal_model(mean_, preference_) + # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) + # + # mean2 = np.array([0.33, 0.24]) + # preference2 = [True, False] + # likelihood_cov2 = acquisition.likelihood(preference2) + # + # acquisition.plot_2D(mean2, likelihood_cov2) + # + # acquisition.update_proposal_model(mean2, preference2) + # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) + # + # mean2 = np.array([-0.66, -0.5]) + # preference2 = [True, True] + # likelihood_cov2 = acquisition.likelihood(preference2) + # + # acquisition.plot_2D(mean2, likelihood_cov2) + # + # acquisition.update_proposal_model(mean2, preference2) + # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) diff --git a/plotter/reward_plotter.py b/plotter/reward_plotter.py index 8eafcbb..8809bb3 100644 --- a/plotter/reward_plotter.py +++ b/plotter/reward_plotter.py @@ -36,9 +36,7 @@ def plot_csv(paths, x_axis, y_axis): if __name__ == '__main__': filenames = ['cp-ei-bo--20-1687348670_183786.csv', - 'cp-ei-random-0_95-20-1687351879_7839339.csv', - 'cp-ei-regular-20_0-20-1687340326_8181093.csv', - 'cp-ei-improvement-0_2-20-1687348464_2842393.csv', + 'cp-pei-regular-20_0-20-1690217019_225133.csv', ] home_dir = os.path.expanduser('~') file_path = os.path.join(home_dir, 'Documents/IntRLResults/cp-e100r10-bf20')