ActiveBOToytask/PolicyModel/GaussianModelMultiDim.py
2023-08-24 14:23:41 +02:00

50 lines
1.6 KiB
Python

import numpy as np
class GaussianPolicy:
def __init__(self, nr_steps, nr_weights, nr_dims, lowerb=-1.0, upperb=1.0, seed=None):
self.nr_weights = nr_weights
self.nr_steps = nr_steps
self.nr_dims = nr_dims
self.weights = None
self.trajectory = None
self.lowerb = lowerb
self.upperb = upperb
self.rng = np.random.default_rng(seed=seed)
# initialize
self.mid_points = np.linspace(0, self.nr_steps, self.nr_weights)
if nr_weights > 1:
self.std = self.mid_points[1] / (2 * np.sqrt(2 * np.log(2))) # Full width at half maximum
else:
self.std = self.nr_steps / 2
self.reset()
def reset(self):
self.weights = np.zeros((self.nr_weights, self.nr_dims))
self.trajectory = np.zeros((self.nr_steps, self.nr_dims))
def random_weights(self):
for dim in range(self.nr_dims):
self.weights[:, dim] = self.rng.uniform(self.lowerb, self.upperb, self.nr_weights)
def rollout(self):
self.trajectory = np.zeros((self.nr_steps, self.nr_dims))
for step in range(self.nr_steps):
for weight in range(self.nr_weights):
base_fun = np.exp(-0.5 * (step - self.mid_points[weight]) ** 2 / self.std ** 2)
for dim in range(self.nr_dims):
self.trajectory[step, dim] += base_fun * self.weights[weight, dim]
return self.trajectory
def set_weights(self, x):
self.weights = x.reshape(self.nr_weights, self.nr_dims)
def get_x(self):
return self.weights.reshape(self.nr_weights * self.nr_dims, )