ActiveBOToytask/runner/BOReacher.py
2023-08-17 16:14:58 +02:00

68 lines
1.8 KiB
Python

# Control Suite
from dm_control import suite
# General
import copy
import numpy as np
# Graphics-related
import matplotlib.pyplot as plt
# Bayesian Optimization
from BayesianOptimization.BOwithGym import BayesianOptimization
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
random_state = np.random.RandomState()
env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})
nr_steps = 100
nr_runs = 10
iteration_steps = 50
acquisition_fun = 'ei'
# storage arrays
finished_store = np.zeros((1, nr_runs))
best_policy = np.zeros((nr_steps, nr_runs, 2))
reward_store = np.zeros((iteration_steps, nr_runs))
spec = env.action_spec()
time_step = env.reset()
def main():
global finished_store, best_policy, reward_store
for i in range(nr_steps):
action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
time_step = env.step(action)
camera0 = env.physics.render(camera_id=0, height=400, width=600)
frames.append(camera0) # Directly append the frame without any modification
rewards.append(time_step.reward)
observations.append(copy.deepcopy(time_step.observation))
ticks.append(env.physics.data.time)
# Show video and plot reward and observations
for i in range(len(frames)):
if i % 20 == 0: # Display every 20th frame for example purposes
print(frames[i].shape)
fig, ax = plt.subplots(1, 1)
ax.imshow(frames[i])
ax.axis('off') # Turn off the axis
# Remove any whitespace from the edges
ax.set_xticks([])
ax.set_yticks([])
plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
plt.margins(0, 0)
plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator())
plt.show()