50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
|
import numpy as np
|
||
|
|
||
|
|
||
|
class GaussianPolicy:
|
||
|
def __init__(self, nr_steps, nr_weights, nr_dims, lowerb=-1.0, upperb=1.0, seed=None):
|
||
|
self.nr_weights = nr_weights
|
||
|
self.nr_steps = nr_steps
|
||
|
self.nr_dims = nr_dims
|
||
|
|
||
|
self.weights = None
|
||
|
self.trajectory = None
|
||
|
|
||
|
self.lowerb = lowerb
|
||
|
self.upperb = upperb
|
||
|
|
||
|
self.rng = np.random.default_rng(seed=seed)
|
||
|
|
||
|
# initialize
|
||
|
self.mid_points = np.linspace(0, self.nr_steps, self.nr_weights)
|
||
|
if nr_weights > 1:
|
||
|
self.std = self.mid_points[1] / (2 * np.sqrt(2 * np.log(2))) # Full width at half maximum
|
||
|
else:
|
||
|
self.std = self.nr_steps / 2
|
||
|
|
||
|
self.reset()
|
||
|
|
||
|
def reset(self):
|
||
|
self.weights = np.zeros((self.nr_weights, self.nr_dims))
|
||
|
self.trajectory = np.zeros((self.nr_steps, self.nr_dims))
|
||
|
|
||
|
def random_weights(self):
|
||
|
for dim in range(self.nr_dims):
|
||
|
self.weights[:, dim] = self.rng.uniform(self.lowerb, self.upperb, self.nr_weights)
|
||
|
|
||
|
def rollout(self):
|
||
|
self.trajectory = np.zeros((self.nr_steps, self.nr_dims))
|
||
|
for step in range(self.nr_steps):
|
||
|
for weight in range(self.nr_weights):
|
||
|
base_fun = np.exp(-0.5 * (step - self.mid_points[weight]) ** 2 / self.std ** 2)
|
||
|
for dim in range(self.nr_dims):
|
||
|
self.trajectory[step, dim] += base_fun * self.weights[weight, dim]
|
||
|
|
||
|
return self.trajectory
|
||
|
|
||
|
def set_weights(self, x):
|
||
|
self.weights = x.reshape(self.nr_weights, self.nr_dims)
|
||
|
|
||
|
def get_x(self):
|
||
|
return self.weights.reshape(self.nr_weights * self.nr_dims, 1)
|