ActiveBOToytask/runner/BODmRunner.py

# Control Suite
from dm_control import suite

# General
import copy
import numpy as np

# Graphics-related
import matplotlib.pyplot as plt

# Bayesian Optimization
from BayesianOptimization.BOwithDM import BayesianOptimization

import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)

seed = None
random_state = np.random.RandomState(seed=seed)
env = suite.load('finger', 'turn_easy', task_kwargs={'random': random_state})
spec = env.action_spec()
print(spec)
time_step = env.reset()

nr_steps = 100
nr_runs = 1
nr_dims = spec.shape[0]
iteration_steps = 10
acquisition_fun = "Expected Improvement"

nr_weights = 15
nr_inits = 3

# storage arrays
best_weights = np.zeros((nr_runs, nr_weights, nr_dims))
best_rewards = np.zeros((nr_runs, iteration_steps))
rewards = np.zeros((nr_runs, iteration_steps))


def post_processing(reward):
    reward_mean = np.mean(reward, axis=0)
    reward_std = np.std(reward, axis=0)

    return reward_mean, reward_std


def plot_reward(mean, std):
    eps = np.linspace(0, mean.shape[0], mean.shape[0])
    plt.plot(eps, mean)

    plt.fill_between(
        eps,
        mean - 1.96 * std,
        mean + 1.96 * std,
        alpha=0.5
    )
    plt.show()


def runner(env_, policy_):
    reward = 0
    env_.reset()
    for step in range(nr_steps):
        action = policy_[step]
        output = env_.step(action)
        print(output.reward)

        if output.reward != 0:
            reward += output.reward * 10
            break

        reward += -1.0

    return reward


def main():
    global best_weights, rewards
    bo = BayesianOptimization(nr_steps, nr_dims, nr_weights, acq=acquisition_fun, seed=seed)
    for run in range(nr_runs):
        bo.reset_bo()
        print(f'Run: {run}')
        # initialization
        for init in range(nr_inits):
            bo.policy_model.random_weights()
            policy = bo.policy_model.rollout()

            reward = runner(env, policy)
            x = bo.policy_model.get_x()
            x = x.reshape(nr_weights * nr_dims, )

            bo.add_observation(reward, x)

        # Bayesian Optimization
        for n in range(iteration_steps):
            x_next = bo.next_observation()

            bo.policy_model.set_weights(x_next)
            policy = bo.policy_model.rollout()

            reward = runner(env, policy)
            x_next = x_next.reshape(nr_weights * nr_dims, )

            bo.add_observation(reward, x_next)
            rewards[run, n] = reward

            y_max, _, _ = bo.get_best_result()
            best_rewards[run, n] = y_max

    reward_mean, reward_std = post_processing(best_rewards)
    plot_reward(reward_mean, reward_std)


if __name__ == '__main__':
    main()

# for i in range(nr_steps):
#     action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)
#     time_step = env.step(action)
#
#     camera0 = env.physics.render(camera_id=0, height=400, width=600)
#     frames.append(camera0)  # Directly append the frame without any modification
#     rewards.append(time_step.reward)
#     observations.append(copy.deepcopy(time_step.observation))
#     ticks.append(env.physics.data.time)
#
# # Show video and plot reward and observations
# for i in range(len(frames)):
#     if i % 20 == 0:  # Display every 20th frame for example purposes
#         print(frames[i].shape)
#         fig, ax = plt.subplots(1, 1)
#         ax.imshow(frames[i])
#         ax.axis('off')  # Turn off the axis
#
#         # Remove any whitespace from the edges
#         ax.set_xticks([])
#         ax.set_yticks([])
#         plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
#         plt.margins(0, 0)
#         plt.gca().xaxis.set_major_locator(plt.NullLocator())
#         plt.gca().yaxis.set_major_locator(plt.NullLocator())
#
#         plt.show()
Deepmind suite tested 2023-08-17 16:26:23 +00:00			`# Control Suite`
			`from dm_control import suite`

			`# General`
			`import copy`
			`import numpy as np`

			`# Graphics-related`
			`import matplotlib.pyplot as plt`

			`# Bayesian Optimization`
			`from BayesianOptimization.BOwithDM import BayesianOptimization`

			`import warnings`
			`from sklearn.exceptions import ConvergenceWarning`

			`warnings.filterwarnings("ignore", category=ConvergenceWarning)`

			`seed = None`
			`random_state = np.random.RandomState(seed=seed)`
Deepmind suite tested 2023-09-10 18:54:34 +00:00			`env = suite.load('finger', 'turn_easy', task_kwargs={'random': random_state})`
Deepmind suite tested 2023-08-17 16:26:23 +00:00			`spec = env.action_spec()`
Deepmind suite tested 2023-09-10 18:54:34 +00:00			`print(spec)`
Deepmind suite tested 2023-08-17 16:26:23 +00:00			`time_step = env.reset()`

			`nr_steps = 100`
Deepmind suite tested 2023-09-10 18:54:34 +00:00			`nr_runs = 1`
Deepmind suite tested 2023-08-17 16:26:23 +00:00			`nr_dims = spec.shape[0]`
Deepmind suite tested 2023-09-10 18:54:34 +00:00			`iteration_steps = 10`
Deepmind suite tested 2023-08-17 16:26:23 +00:00			`acquisition_fun = "Expected Improvement"`

			`nr_weights = 15`
			`nr_inits = 3`

			`# storage arrays`
			`best_weights = np.zeros((nr_runs, nr_weights, nr_dims))`
			`best_rewards = np.zeros((nr_runs, iteration_steps))`
			`rewards = np.zeros((nr_runs, iteration_steps))`


			`def post_processing(reward):`
			`reward_mean = np.mean(reward, axis=0)`
			`reward_std = np.std(reward, axis=0)`

			`return reward_mean, reward_std`


			`def plot_reward(mean, std):`
			`eps = np.linspace(0, mean.shape[0], mean.shape[0])`
			`plt.plot(eps, mean)`

			`plt.fill_between(`
			`eps,`
			`mean - 1.96 * std,`
			`mean + 1.96 * std,`
			`alpha=0.5`
			`)`
			`plt.show()`


			`def runner(env_, policy_):`
			`reward = 0`
			`env_.reset()`
			`for step in range(nr_steps):`
			`action = policy_[step]`
			`output = env_.step(action)`
Deepmind suite tested 2023-09-10 18:54:34 +00:00			`print(output.reward)`
Deepmind suite tested 2023-08-17 16:26:23 +00:00
			`if output.reward != 0:`
			`reward += output.reward * 10`
			`break`

			`reward += -1.0`

			`return reward`


			`def main():`
			`global best_weights, rewards`
			`bo = BayesianOptimization(nr_steps, nr_dims, nr_weights, acq=acquisition_fun, seed=seed)`
			`for run in range(nr_runs):`
			`bo.reset_bo()`
			`print(f'Run: {run}')`
			`# initialization`
			`for init in range(nr_inits):`
			`bo.policy_model.random_weights()`
			`policy = bo.policy_model.rollout()`

			`reward = runner(env, policy)`
			`x = bo.policy_model.get_x()`
			`x = x.reshape(nr_weights * nr_dims, )`

			`bo.add_observation(reward, x)`

			`# Bayesian Optimization`
			`for n in range(iteration_steps):`
			`x_next = bo.next_observation()`

			`bo.policy_model.set_weights(x_next)`
			`policy = bo.policy_model.rollout()`

			`reward = runner(env, policy)`
			`x_next = x_next.reshape(nr_weights * nr_dims, )`

			`bo.add_observation(reward, x_next)`
			`rewards[run, n] = reward`

			`y_max, _, _ = bo.get_best_result()`
			`best_rewards[run, n] = y_max`

			`reward_mean, reward_std = post_processing(best_rewards)`
			`plot_reward(reward_mean, reward_std)`


			`if __name__ == '__main__':`
			`main()`

			`# for i in range(nr_steps):`
			`# action = random_state.uniform(spec.minimum, spec.maximum, spec.shape)`
			`# time_step = env.step(action)`
			`#`
			`# camera0 = env.physics.render(camera_id=0, height=400, width=600)`
			`# frames.append(camera0) # Directly append the frame without any modification`
			`# rewards.append(time_step.reward)`
			`# observations.append(copy.deepcopy(time_step.observation))`
			`# ticks.append(env.physics.data.time)`
			`#`
			`# # Show video and plot reward and observations`
			`# for i in range(len(frames)):`
			`# if i % 20 == 0: # Display every 20th frame for example purposes`
			`# print(frames[i].shape)`
			`# fig, ax = plt.subplots(1, 1)`
			`# ax.imshow(frames[i])`
			`# ax.axis('off') # Turn off the axis`
			`#`
			`# # Remove any whitespace from the edges`
			`# ax.set_xticks([])`
			`# ax.set_yticks([])`
			`# plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)`
			`# plt.margins(0, 0)`
			`# plt.gca().xaxis.set_major_locator(plt.NullLocator())`
			`# plt.gca().yaxis.set_major_locator(plt.NullLocator())`
			`#`
			`# plt.show()`