75 lines
2.7 KiB
Python
Executable File
75 lines
2.7 KiB
Python
Executable File
# Copyright 2018 The dm_control Authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ============================================================================
|
|
|
|
"""Wrapper control suite environments that adds Gaussian noise to actions."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import dm_env
|
|
import numpy as np
|
|
|
|
|
|
_BOUNDS_MUST_BE_FINITE = (
|
|
'All bounds in `env.action_spec()` must be finite, got: {action_spec}')
|
|
|
|
|
|
class Wrapper(dm_env.Environment):
|
|
"""Wraps a control environment and adds Gaussian noise to actions."""
|
|
|
|
def __init__(self, env, scale=0.01):
|
|
"""Initializes a new action noise Wrapper.
|
|
|
|
Args:
|
|
env: The control suite environment to wrap.
|
|
scale: The standard deviation of the noise, expressed as a fraction
|
|
of the max-min range for each action dimension.
|
|
|
|
Raises:
|
|
ValueError: If any of the action dimensions of the wrapped environment are
|
|
unbounded.
|
|
"""
|
|
action_spec = env.action_spec()
|
|
if not (np.all(np.isfinite(action_spec.minimum)) and
|
|
np.all(np.isfinite(action_spec.maximum))):
|
|
raise ValueError(_BOUNDS_MUST_BE_FINITE.format(action_spec=action_spec))
|
|
self._minimum = action_spec.minimum
|
|
self._maximum = action_spec.maximum
|
|
self._noise_std = scale * (action_spec.maximum - action_spec.minimum)
|
|
self._env = env
|
|
|
|
def step(self, action):
|
|
noisy_action = action + self._env.task.random.normal(scale=self._noise_std)
|
|
# Clip the noisy actions in place so that they fall within the bounds
|
|
# specified by the `action_spec`. Note that MuJoCo implicitly clips out-of-
|
|
# bounds control inputs, but we also clip here in case the actions do not
|
|
# correspond directly to MuJoCo actuators, or if there are other wrapper
|
|
# layers that expect the actions to be within bounds.
|
|
np.clip(noisy_action, self._minimum, self._maximum, out=noisy_action)
|
|
return self._env.step(noisy_action)
|
|
|
|
def reset(self):
|
|
return self._env.reset()
|
|
|
|
def observation_spec(self):
|
|
return self._env.observation_spec()
|
|
|
|
def action_spec(self):
|
|
return self._env.action_spec()
|
|
|
|
def __getattr__(self, name):
|
|
return getattr(self._env, name)
|