# Control Suite from dm_control import suite # General import copy import numpy as np # Graphics-related import matplotlib.pyplot as plt # Bayesian Optimization from BayesianOptimization.BOwithGym import BayesianOptimization import warnings from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings("ignore", category=ConvergenceWarning) random_state = np.random.RandomState() env = suite.load('reacher', 'hard', task_kwargs={'random': random_state}) nr_steps = 100 nr_runs = 10 iteration_steps = 50 acquisition_fun = 'ei' # storage arrays finished_store = np.zeros((1, nr_runs)) best_policy = np.zeros((nr_steps, nr_runs, 2)) reward_store = np.zeros((iteration_steps, nr_runs)) spec = env.action_spec() time_step = env.reset() def main(): global finished_store, best_policy, reward_store for i in range(nr_steps): action = random_state.uniform(spec.minimum, spec.maximum, spec.shape) time_step = env.step(action) camera0 = env.physics.render(camera_id=0, height=400, width=600) frames.append(camera0) # Directly append the frame without any modification rewards.append(time_step.reward) observations.append(copy.deepcopy(time_step.observation)) ticks.append(env.physics.data.time) # Show video and plot reward and observations for i in range(len(frames)): if i % 20 == 0: # Display every 20th frame for example purposes print(frames[i].shape) fig, ax = plt.subplots(1, 1) ax.imshow(frames[i]) ax.axis('off') # Turn off the axis # Remove any whitespace from the edges ax.set_xticks([]) ax.set_yticks([]) plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0) plt.margins(0, 0) plt.gca().xaxis.set_major_locator(plt.NullLocator()) plt.gca().yaxis.set_major_locator(plt.NullLocator()) plt.show()