ActiveBOToytask/runner/BOReacher.py

# Control Suite
from dm_control import suite

# General
import copy
import numpy as np

# Graphics-related
import matplotlib.pyplot as plt

# Bayesian Optimization
from BayesianOptimization.BOwithGym import BayesianOptimization

import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)

random_state = np.random.RandomState()
env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})

nr_steps = 100
nr_runs = 10
iteration_steps = 50
acquisition_fun = 'ei'

# storage arrays
finished_store = np.zeros((1, nr_runs))
best_policy = np.zeros((nr_steps, nr_runs, 2))
reward_store = np.zeros((iteration_steps, nr_runs))

spec = env.action_spec()
time_step = env.reset()

def main():
    global finished_store, best_policy, reward_store


for i in range(nr_steps):
    action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
    time_step = env.step(action)

    camera0 = env.physics.render(camera_id=0, height=400, width=600)
    frames.append(camera0)  # Directly append the frame without any modification
    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)

# Show video and plot reward and observations
for i in range(len(frames)):
    if i % 20 == 0:  # Display every 20th frame for example purposes
        print(frames[i].shape)
        fig, ax = plt.subplots(1, 1)
        ax.imshow(frames[i])
        ax.axis('off')  # Turn off the axis

        # Remove any whitespace from the edges
        ax.set_xticks([])
        ax.set_yticks([])
        plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
        plt.margins(0, 0)
        plt.gca().xaxis.set_major_locator(plt.NullLocator())
        plt.gca().yaxis.set_major_locator(plt.NullLocator())

        plt.show()