68 lines
1.8 KiB
Python
68 lines
1.8 KiB
Python
# Control Suite
|
|
from dm_control import suite
|
|
|
|
# General
|
|
import copy
|
|
import numpy as np
|
|
|
|
# Graphics-related
|
|
import matplotlib.pyplot as plt
|
|
|
|
# Bayesian Optimization
|
|
from BayesianOptimization.BOwithGym import BayesianOptimization
|
|
|
|
import warnings
|
|
from sklearn.exceptions import ConvergenceWarning
|
|
|
|
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
|
|
|
random_state = np.random.RandomState()
|
|
env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})
|
|
|
|
nr_steps = 100
|
|
nr_runs = 10
|
|
iteration_steps = 50
|
|
acquisition_fun = 'ei'
|
|
|
|
# storage arrays
|
|
finished_store = np.zeros((1, nr_runs))
|
|
best_policy = np.zeros((nr_steps, nr_runs, 2))
|
|
reward_store = np.zeros((iteration_steps, nr_runs))
|
|
|
|
spec = env.action_spec()
|
|
time_step = env.reset()
|
|
|
|
def main():
|
|
global finished_store, best_policy, reward_store
|
|
|
|
|
|
for i in range(nr_steps):
|
|
action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
|
|
time_step = env.step(action)
|
|
|
|
camera0 = env.physics.render(camera_id=0, height=400, width=600)
|
|
frames.append(camera0) # Directly append the frame without any modification
|
|
rewards.append(time_step.reward)
|
|
observations.append(copy.deepcopy(time_step.observation))
|
|
ticks.append(env.physics.data.time)
|
|
|
|
# Show video and plot reward and observations
|
|
for i in range(len(frames)):
|
|
if i % 20 == 0: # Display every 20th frame for example purposes
|
|
print(frames[i].shape)
|
|
fig, ax = plt.subplots(1, 1)
|
|
ax.imshow(frames[i])
|
|
ax.axis('off') # Turn off the axis
|
|
|
|
# Remove any whitespace from the edges
|
|
ax.set_xticks([])
|
|
ax.set_yticks([])
|
|
plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
|
|
plt.margins(0, 0)
|
|
plt.gca().xaxis.set_major_locator(plt.NullLocator())
|
|
plt.gca().yaxis.set_major_locator(plt.NullLocator())
|
|
|
|
plt.show()
|
|
|
|
|