import numpy as np class GaussianPolicy: def __init__(self, nr_steps, nr_weights, nr_dims, lowerb=-1.0, upperb=1.0, seed=None): self.nr_weights = nr_weights self.nr_steps = nr_steps self.nr_dims = nr_dims self.weights = None self.trajectory = None self.lowerb = lowerb self.upperb = upperb self.rng = np.random.default_rng(seed=seed) # initialize self.mid_points = np.linspace(0, self.nr_steps, self.nr_weights) if nr_weights > 1: self.std = self.mid_points[1] / (2 * np.sqrt(2 * np.log(2))) # Full width at half maximum else: self.std = self.nr_steps / 2 self.reset() def reset(self): self.weights = np.zeros((self.nr_weights, self.nr_dims)) self.trajectory = np.zeros((self.nr_steps, self.nr_dims)) def random_weights(self): for dim in range(self.nr_dims): self.weights[:, dim] = self.rng.uniform(self.lowerb, self.upperb, self.nr_weights) def rollout(self): self.trajectory = np.zeros((self.nr_steps, self.nr_dims)) for step in range(self.nr_steps): for weight in range(self.nr_weights): base_fun = np.exp(-0.5 * (step - self.mid_points[weight]) ** 2 / self.std ** 2) for dim in range(self.nr_dims): self.trajectory[step, dim] += base_fun * self.weights[weight, dim] return self.trajectory def set_weights(self, x): self.weights = x.reshape(self.nr_weights, self.nr_dims) def get_x(self): return self.weights.reshape(self.nr_weights * self.nr_dims, 1)