adding reacher as env

This commit is contained in:
Niko Feith 2023-08-17 16:14:58 +02:00
parent c3271fe6d2
commit 886514f9e6
3 changed files with 109 additions and 36 deletions

View File

@ -68,7 +68,7 @@ class PreferenceExpectedImprovement:
def likelihood(self, preference): def likelihood(self, preference):
covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance
covariance_diag[preference] = 0.1 covariance_diag[preference] = 0.05
covariance = np.diag(covariance_diag) covariance = np.diag(covariance_diag)
@ -77,7 +77,7 @@ class PreferenceExpectedImprovement:
def update_proposal_model(self, preference_mean, preference_bool): def update_proposal_model(self, preference_mean, preference_bool):
covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance
covariance_diag[preference_bool] = 0.1 covariance_diag[preference_bool] = 0.05
preference_cov = np.diag(covariance_diag) preference_cov = np.diag(covariance_diag)
@ -116,34 +116,38 @@ class PreferenceExpectedImprovement:
if __name__ == '__main__': if __name__ == '__main__':
acquisition = PreferenceExpectedImprovement(10, 10, 10e4, -1.0, 1.0, 5.0) # acquisition = PreferenceExpectedImprovement(10, 10, 10e4, -1.0, 1.0, 5.0)
sample_res = acquisition.rejection_sampling() # sample_res = acquisition.rejection_sampling()
print(f"finished: {sample_res}") # print(f"finished: {sample_res}")
# acquisition = PreferenceExpectedImprovement(10, 2, 10e4, -1.0, 1.0, 10.0) acquisition = PreferenceExpectedImprovement(10, 2, 10e4, -1.0, 1.0, 10.0)
# mean_ = np.array([0.5, 0.23]) mean_ = np.array([0.5, 0.23])
# preference_ = [False, True] preference_ = [False, True]
# likelihood_cov = acquisition.likelihood(preference_) likelihood_cov = acquisition.likelihood(preference_)
#
# acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
# acquisition.plot_2D(mean_, likelihood_cov) acquisition.plot_2D(mean_, likelihood_cov)
#
# acquisition.update_proposal_model(mean_, preference_) acquisition.update_proposal_model(mean_, preference_)
# acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
#
# mean2 = np.array([0.33, 0.24]) mean2 = np.array([0.33, 0.24])
# preference2 = [True, False] preference2 = [True, False]
# likelihood_cov2 = acquisition.likelihood(preference2) likelihood_cov2 = acquisition.likelihood(preference2)
#
# acquisition.plot_2D(mean2, likelihood_cov2) acquisition.plot_2D(mean2, likelihood_cov2)
#
# acquisition.update_proposal_model(mean2, preference2) acquisition.update_proposal_model(mean2, preference2)
# acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
#
# mean2 = np.array([-0.66, -0.5]) mean2 = np.array([-0.66, -0.5])
# preference2 = [True, True] preference2 = [False, False]
# likelihood_cov2 = acquisition.likelihood(preference2) likelihood_cov2 = acquisition.likelihood(preference2)
#
# acquisition.plot_2D(mean2, likelihood_cov2) acquisition.plot_2D(mean2, likelihood_cov2)
#
# acquisition.update_proposal_model(mean2, preference2) acquisition.update_proposal_model(mean2, preference2)
# acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov) acquisition.update_proposal_model(mean2, preference2)
acquisition.update_proposal_model(mean2, preference2)
acquisition.update_proposal_model(mean2, preference2)
acquisition.update_proposal_model(mean2, preference2)
acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)

View File

@ -35,11 +35,13 @@ def plot_csv(paths, x_axis, y_axis):
if __name__ == '__main__': if __name__ == '__main__':
filenames = ['cp-ei-bo--20-1687348670_183786.csv', filenames = ['cp-e150r10-bf15-base/cp-ei-random-1_0-15-1690282051_2959082.csv',
'cp-pei-regular-20_0-20-1690217019_225133.csv', 'cp-e150r10-bf15-noshaping/cp-pei-random-0_95-15-1690276946_1944933.csv',
'cp-e150r10-bf15-noshaping/cp-pei-regular-25_0-15-1690290021_6843266.csv',
'cp-e150r10-bf15-noshaping/cp-pei-improvement-0_1-15-1690292664_0382216.csv',
] ]
home_dir = os.path.expanduser('~') home_dir = os.path.expanduser('~')
file_path = os.path.join(home_dir, 'Documents/IntRLResults/cp-e100r10-bf20') file_path = os.path.join(home_dir, 'Documents/IntRLResults/CP-Results')
paths = [os.path.join(file_path, filename) for filename in filenames] paths = [os.path.join(file_path, filename) for filename in filenames]
plot_csv(paths, 'Episodes', 'Reward') plot_csv(paths, 'Episodes', 'Reward')
# #

67
runner/BOReacher.py Normal file
View File

@ -0,0 +1,67 @@
# Control Suite
from dm_control import suite
# General
import copy
import numpy as np
# Graphics-related
import matplotlib.pyplot as plt
# Bayesian Optimization
from BayesianOptimization.BOwithGym import BayesianOptimization
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
random_state = np.random.RandomState()
env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})
nr_steps = 100
nr_runs = 10
iteration_steps = 50
acquisition_fun = 'ei'
# storage arrays
finished_store = np.zeros((1, nr_runs))
best_policy = np.zeros((nr_steps, nr_runs, 2))
reward_store = np.zeros((iteration_steps, nr_runs))
spec = env.action_spec()
time_step = env.reset()
def main():
global finished_store, best_policy, reward_store
for i in range(nr_steps):
action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
time_step = env.step(action)
camera0 = env.physics.render(camera_id=0, height=400, width=600)
frames.append(camera0) # Directly append the frame without any modification
rewards.append(time_step.reward)
observations.append(copy.deepcopy(time_step.observation))
ticks.append(env.physics.data.time)
# Show video and plot reward and observations
for i in range(len(frames)):
if i % 20 == 0: # Display every 20th frame for example purposes
print(frames[i].shape)
fig, ax = plt.subplots(1, 1)
ax.imshow(frames[i])
ax.axis('off') # Turn off the axis
# Remove any whitespace from the edges
ax.set_xticks([])
ax.set_yticks([])
plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
plt.margins(0, 0)
plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator())
plt.show()