From 886514f9e68a819391aa163eb11b4f19d9a6ff12 Mon Sep 17 00:00:00 2001 From: Niko Date: Thu, 17 Aug 2023 16:14:58 +0200 Subject: [PATCH] adding reacher as env --- .../PreferenceExpectedImprovement.py | 70 ++++++++++--------- plotter/reward_plotter.py | 8 ++- runner/BOReacher.py | 67 ++++++++++++++++++ 3 files changed, 109 insertions(+), 36 deletions(-) create mode 100644 runner/BOReacher.py diff --git a/AcquistionFunctions/PreferenceExpectedImprovement.py b/AcquistionFunctions/PreferenceExpectedImprovement.py index 1d2e935..cc4a5e9 100644 --- a/AcquistionFunctions/PreferenceExpectedImprovement.py +++ b/AcquistionFunctions/PreferenceExpectedImprovement.py @@ -68,7 +68,7 @@ class PreferenceExpectedImprovement: def likelihood(self, preference): covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance - covariance_diag[preference] = 0.1 + covariance_diag[preference] = 0.05 covariance = np.diag(covariance_diag) @@ -77,7 +77,7 @@ class PreferenceExpectedImprovement: def update_proposal_model(self, preference_mean, preference_bool): covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance - covariance_diag[preference_bool] = 0.1 + covariance_diag[preference_bool] = 0.05 preference_cov = np.diag(covariance_diag) @@ -116,34 +116,38 @@ class PreferenceExpectedImprovement: if __name__ == '__main__': - acquisition = PreferenceExpectedImprovement(10, 10, 10e4, -1.0, 1.0, 5.0) - sample_res = acquisition.rejection_sampling() - print(f"finished: {sample_res}") - # acquisition = PreferenceExpectedImprovement(10, 2, 10e4, -1.0, 1.0, 10.0) - # mean_ = np.array([0.5, 0.23]) - # preference_ = [False, True] - # likelihood_cov = acquisition.likelihood(preference_) - # - # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) - # acquisition.plot_2D(mean_, likelihood_cov) - # - # acquisition.update_proposal_model(mean_, preference_) - # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) - # - # mean2 = np.array([0.33, 0.24]) - # preference2 = [True, False] - # likelihood_cov2 = acquisition.likelihood(preference2) - # - # acquisition.plot_2D(mean2, likelihood_cov2) - # - # acquisition.update_proposal_model(mean2, preference2) - # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) - # - # mean2 = np.array([-0.66, -0.5]) - # preference2 = [True, True] - # likelihood_cov2 = acquisition.likelihood(preference2) - # - # acquisition.plot_2D(mean2, likelihood_cov2) - # - # acquisition.update_proposal_model(mean2, preference2) - # acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) + # acquisition = PreferenceExpectedImprovement(10, 10, 10e4, -1.0, 1.0, 5.0) + # sample_res = acquisition.rejection_sampling() + # print(f"finished: {sample_res}") + acquisition = PreferenceExpectedImprovement(10, 2, 10e4, -1.0, 1.0, 10.0) + mean_ = np.array([0.5, 0.23]) + preference_ = [False, True] + likelihood_cov = acquisition.likelihood(preference_) + + acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) + acquisition.plot_2D(mean_, likelihood_cov) + + acquisition.update_proposal_model(mean_, preference_) + acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) + + mean2 = np.array([0.33, 0.24]) + preference2 = [True, False] + likelihood_cov2 = acquisition.likelihood(preference2) + + acquisition.plot_2D(mean2, likelihood_cov2) + + acquisition.update_proposal_model(mean2, preference2) + acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) + + mean2 = np.array([-0.66, -0.5]) + preference2 = [False, False] + likelihood_cov2 = acquisition.likelihood(preference2) + + acquisition.plot_2D(mean2, likelihood_cov2) + + acquisition.update_proposal_model(mean2, preference2) + acquisition.update_proposal_model(mean2, preference2) + acquisition.update_proposal_model(mean2, preference2) + acquisition.update_proposal_model(mean2, preference2) + acquisition.update_proposal_model(mean2, preference2) + acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) diff --git a/plotter/reward_plotter.py b/plotter/reward_plotter.py index 8809bb3..ffeaa75 100644 --- a/plotter/reward_plotter.py +++ b/plotter/reward_plotter.py @@ -35,11 +35,13 @@ def plot_csv(paths, x_axis, y_axis): if __name__ == '__main__': - filenames = ['cp-ei-bo--20-1687348670_183786.csv', - 'cp-pei-regular-20_0-20-1690217019_225133.csv', + filenames = ['cp-e150r10-bf15-base/cp-ei-random-1_0-15-1690282051_2959082.csv', + 'cp-e150r10-bf15-noshaping/cp-pei-random-0_95-15-1690276946_1944933.csv', + 'cp-e150r10-bf15-noshaping/cp-pei-regular-25_0-15-1690290021_6843266.csv', + 'cp-e150r10-bf15-noshaping/cp-pei-improvement-0_1-15-1690292664_0382216.csv', ] home_dir = os.path.expanduser('~') - file_path = os.path.join(home_dir, 'Documents/IntRLResults/cp-e100r10-bf20') + file_path = os.path.join(home_dir, 'Documents/IntRLResults/CP-Results') paths = [os.path.join(file_path, filename) for filename in filenames] plot_csv(paths, 'Episodes', 'Reward') # diff --git a/runner/BOReacher.py b/runner/BOReacher.py new file mode 100644 index 0000000..44afefc --- /dev/null +++ b/runner/BOReacher.py @@ -0,0 +1,67 @@ +# Control Suite +from dm_control import suite + +# General +import copy +import numpy as np + +# Graphics-related +import matplotlib.pyplot as plt + +# Bayesian Optimization +from BayesianOptimization.BOwithGym import BayesianOptimization + +import warnings +from sklearn.exceptions import ConvergenceWarning + +warnings.filterwarnings("ignore", category=ConvergenceWarning) + +random_state = np.random.RandomState() +env = suite.load('reacher', 'hard', task_kwargs={'random': random_state}) + +nr_steps = 100 +nr_runs = 10 +iteration_steps = 50 +acquisition_fun = 'ei' + +# storage arrays +finished_store = np.zeros((1, nr_runs)) +best_policy = np.zeros((nr_steps, nr_runs, 2)) +reward_store = np.zeros((iteration_steps, nr_runs)) + +spec = env.action_spec() +time_step = env.reset() + +def main(): + global finished_store, best_policy, reward_store + + +for i in range(nr_steps): + action = random_state.uniform(spec.minimum, spec.maximum, spec.shape) + time_step = env.step(action) + + camera0 = env.physics.render(camera_id=0, height=400, width=600) + frames.append(camera0) # Directly append the frame without any modification + rewards.append(time_step.reward) + observations.append(copy.deepcopy(time_step.observation)) + ticks.append(env.physics.data.time) + +# Show video and plot reward and observations +for i in range(len(frames)): + if i % 20 == 0: # Display every 20th frame for example purposes + print(frames[i].shape) + fig, ax = plt.subplots(1, 1) + ax.imshow(frames[i]) + ax.axis('off') # Turn off the axis + + # Remove any whitespace from the edges + ax.set_xticks([]) + ax.set_yticks([]) + plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0) + plt.margins(0, 0) + plt.gca().xaxis.set_major_locator(plt.NullLocator()) + plt.gca().yaxis.set_major_locator(plt.NullLocator()) + + plt.show() + +