adding reacher as env
This commit is contained in:
parent
c3271fe6d2
commit
886514f9e6
@ -68,7 +68,7 @@ class PreferenceExpectedImprovement:
|
||||
|
||||
def likelihood(self, preference):
|
||||
covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance
|
||||
covariance_diag[preference] = 0.1
|
||||
covariance_diag[preference] = 0.05
|
||||
|
||||
covariance = np.diag(covariance_diag)
|
||||
|
||||
@ -77,7 +77,7 @@ class PreferenceExpectedImprovement:
|
||||
def update_proposal_model(self, preference_mean, preference_bool):
|
||||
|
||||
covariance_diag = np.ones((self.nr_dims,)) * self.initial_variance
|
||||
covariance_diag[preference_bool] = 0.1
|
||||
covariance_diag[preference_bool] = 0.05
|
||||
|
||||
preference_cov = np.diag(covariance_diag)
|
||||
|
||||
@ -116,34 +116,38 @@ class PreferenceExpectedImprovement:
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
acquisition = PreferenceExpectedImprovement(10, 10, 10e4, -1.0, 1.0, 5.0)
|
||||
sample_res = acquisition.rejection_sampling()
|
||||
print(f"finished: {sample_res}")
|
||||
# acquisition = PreferenceExpectedImprovement(10, 2, 10e4, -1.0, 1.0, 10.0)
|
||||
# mean_ = np.array([0.5, 0.23])
|
||||
# preference_ = [False, True]
|
||||
# likelihood_cov = acquisition.likelihood(preference_)
|
||||
#
|
||||
# acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
|
||||
# acquisition.plot_2D(mean_, likelihood_cov)
|
||||
#
|
||||
# acquisition.update_proposal_model(mean_, preference_)
|
||||
# acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
|
||||
#
|
||||
# mean2 = np.array([0.33, 0.24])
|
||||
# preference2 = [True, False]
|
||||
# likelihood_cov2 = acquisition.likelihood(preference2)
|
||||
#
|
||||
# acquisition.plot_2D(mean2, likelihood_cov2)
|
||||
#
|
||||
# acquisition.update_proposal_model(mean2, preference2)
|
||||
# acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
|
||||
#
|
||||
# mean2 = np.array([-0.66, -0.5])
|
||||
# preference2 = [True, True]
|
||||
# likelihood_cov2 = acquisition.likelihood(preference2)
|
||||
#
|
||||
# acquisition.plot_2D(mean2, likelihood_cov2)
|
||||
#
|
||||
# acquisition.update_proposal_model(mean2, preference2)
|
||||
# acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
|
||||
# acquisition = PreferenceExpectedImprovement(10, 10, 10e4, -1.0, 1.0, 5.0)
|
||||
# sample_res = acquisition.rejection_sampling()
|
||||
# print(f"finished: {sample_res}")
|
||||
acquisition = PreferenceExpectedImprovement(10, 2, 10e4, -1.0, 1.0, 10.0)
|
||||
mean_ = np.array([0.5, 0.23])
|
||||
preference_ = [False, True]
|
||||
likelihood_cov = acquisition.likelihood(preference_)
|
||||
|
||||
acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
|
||||
acquisition.plot_2D(mean_, likelihood_cov)
|
||||
|
||||
acquisition.update_proposal_model(mean_, preference_)
|
||||
acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
|
||||
|
||||
mean2 = np.array([0.33, 0.24])
|
||||
preference2 = [True, False]
|
||||
likelihood_cov2 = acquisition.likelihood(preference2)
|
||||
|
||||
acquisition.plot_2D(mean2, likelihood_cov2)
|
||||
|
||||
acquisition.update_proposal_model(mean2, preference2)
|
||||
acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
|
||||
|
||||
mean2 = np.array([-0.66, -0.5])
|
||||
preference2 = [False, False]
|
||||
likelihood_cov2 = acquisition.likelihood(preference2)
|
||||
|
||||
acquisition.plot_2D(mean2, likelihood_cov2)
|
||||
|
||||
acquisition.update_proposal_model(mean2, preference2)
|
||||
acquisition.update_proposal_model(mean2, preference2)
|
||||
acquisition.update_proposal_model(mean2, preference2)
|
||||
acquisition.update_proposal_model(mean2, preference2)
|
||||
acquisition.update_proposal_model(mean2, preference2)
|
||||
acquisition.plot_2D(acquisition.proposal_mean, acquisition.proposal_cov)
|
||||
|
@ -35,11 +35,13 @@ def plot_csv(paths, x_axis, y_axis):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
filenames = ['cp-ei-bo--20-1687348670_183786.csv',
|
||||
'cp-pei-regular-20_0-20-1690217019_225133.csv',
|
||||
filenames = ['cp-e150r10-bf15-base/cp-ei-random-1_0-15-1690282051_2959082.csv',
|
||||
'cp-e150r10-bf15-noshaping/cp-pei-random-0_95-15-1690276946_1944933.csv',
|
||||
'cp-e150r10-bf15-noshaping/cp-pei-regular-25_0-15-1690290021_6843266.csv',
|
||||
'cp-e150r10-bf15-noshaping/cp-pei-improvement-0_1-15-1690292664_0382216.csv',
|
||||
]
|
||||
home_dir = os.path.expanduser('~')
|
||||
file_path = os.path.join(home_dir, 'Documents/IntRLResults/cp-e100r10-bf20')
|
||||
file_path = os.path.join(home_dir, 'Documents/IntRLResults/CP-Results')
|
||||
paths = [os.path.join(file_path, filename) for filename in filenames]
|
||||
plot_csv(paths, 'Episodes', 'Reward')
|
||||
#
|
||||
|
67
runner/BOReacher.py
Normal file
67
runner/BOReacher.py
Normal file
@ -0,0 +1,67 @@
|
||||
# Control Suite
|
||||
from dm_control import suite
|
||||
|
||||
# General
|
||||
import copy
|
||||
import numpy as np
|
||||
|
||||
# Graphics-related
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# Bayesian Optimization
|
||||
from BayesianOptimization.BOwithGym import BayesianOptimization
|
||||
|
||||
import warnings
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
|
||||
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
||||
|
||||
random_state = np.random.RandomState()
|
||||
env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})
|
||||
|
||||
nr_steps = 100
|
||||
nr_runs = 10
|
||||
iteration_steps = 50
|
||||
acquisition_fun = 'ei'
|
||||
|
||||
# storage arrays
|
||||
finished_store = np.zeros((1, nr_runs))
|
||||
best_policy = np.zeros((nr_steps, nr_runs, 2))
|
||||
reward_store = np.zeros((iteration_steps, nr_runs))
|
||||
|
||||
spec = env.action_spec()
|
||||
time_step = env.reset()
|
||||
|
||||
def main():
|
||||
global finished_store, best_policy, reward_store
|
||||
|
||||
|
||||
for i in range(nr_steps):
|
||||
action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
|
||||
time_step = env.step(action)
|
||||
|
||||
camera0 = env.physics.render(camera_id=0, height=400, width=600)
|
||||
frames.append(camera0) # Directly append the frame without any modification
|
||||
rewards.append(time_step.reward)
|
||||
observations.append(copy.deepcopy(time_step.observation))
|
||||
ticks.append(env.physics.data.time)
|
||||
|
||||
# Show video and plot reward and observations
|
||||
for i in range(len(frames)):
|
||||
if i % 20 == 0: # Display every 20th frame for example purposes
|
||||
print(frames[i].shape)
|
||||
fig, ax = plt.subplots(1, 1)
|
||||
ax.imshow(frames[i])
|
||||
ax.axis('off') # Turn off the axis
|
||||
|
||||
# Remove any whitespace from the edges
|
||||
ax.set_xticks([])
|
||||
ax.set_yticks([])
|
||||
plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
|
||||
plt.margins(0, 0)
|
||||
plt.gca().xaxis.set_major_locator(plt.NullLocator())
|
||||
plt.gca().yaxis.set_major_locator(plt.NullLocator())
|
||||
|
||||
plt.show()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user