DB/local_dm_control_suite/tests/domains_test.py

# Copyright 2017 The dm_control Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Tests for dm_control.suite domains."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# Internal dependencies.
from absl.testing import absltest
from absl.testing import parameterized
from dm_control import suite
from dm_control.rl import control
import mock
import numpy as np
import six
from six.moves import range
from six.moves import zip


def uniform_random_policy(action_spec, random=None):
  lower_bounds = action_spec.minimum
  upper_bounds = action_spec.maximum
  # Draw values between -1 and 1 for unbounded actions.
  lower_bounds = np.where(np.isinf(lower_bounds), -1.0, lower_bounds)
  upper_bounds = np.where(np.isinf(upper_bounds), 1.0, upper_bounds)
  random_state = np.random.RandomState(random)
  def policy(time_step):
    del time_step  # Unused.
    return random_state.uniform(lower_bounds, upper_bounds)
  return policy


def step_environment(env, policy, num_episodes=5, max_steps_per_episode=10):
  for _ in range(num_episodes):
    step_count = 0
    time_step = env.reset()
    yield time_step
    while not time_step.last():
      action = policy(time_step)
      time_step = env.step(action)
      step_count += 1
      yield time_step
      if step_count >= max_steps_per_episode:
        break


def make_trajectory(domain, task, seed, **trajectory_kwargs):
  env = suite.load(domain, task, task_kwargs={'random': seed})
  policy = uniform_random_policy(env.action_spec(), random=seed)
  return step_environment(env, policy, **trajectory_kwargs)


class DomainTest(parameterized.TestCase):
  """Tests run on all the tasks registered."""

  def test_constants(self):
    num_tasks = sum(len(tasks) for tasks in
                    six.itervalues(suite.TASKS_BY_DOMAIN))

    self.assertLen(suite.ALL_TASKS, num_tasks)

  def _validate_observation(self, observation_dict, observation_spec):
    obs = observation_dict.copy()
    for name, spec in six.iteritems(observation_spec):
      arr = obs.pop(name)
      self.assertEqual(arr.shape, spec.shape)
      self.assertEqual(arr.dtype, spec.dtype)
      self.assertTrue(
          np.all(np.isfinite(arr)),
          msg='{!r} has non-finite value(s): {!r}'.format(name, arr))
    self.assertEmpty(
        obs,
        msg='Observation contains arrays(s) that are not in the spec: {!r}'
        .format(obs))

  def _validate_reward_range(self, time_step):
    if time_step.first():
      self.assertIsNone(time_step.reward)
    else:
      self.assertIsInstance(time_step.reward, float)
      self.assertBetween(time_step.reward, 0, 1)

  def _validate_discount(self, time_step):
    if time_step.first():
      self.assertIsNone(time_step.discount)
    else:
      self.assertIsInstance(time_step.discount, float)
      self.assertBetween(time_step.discount, 0, 1)

  def _validate_control_range(self, lower_bounds, upper_bounds):
    for b in lower_bounds:
      self.assertEqual(b, -1.0)
    for b in upper_bounds:
      self.assertEqual(b, 1.0)

  @parameterized.parameters(*suite.ALL_TASKS)
  def test_components_have_names(self, domain, task):
    env = suite.load(domain, task)
    model = env.physics.model

    object_types_and_size_fields = [
        ('body', 'nbody'),
        ('joint', 'njnt'),
        ('geom', 'ngeom'),
        ('site', 'nsite'),
        ('camera', 'ncam'),
        ('light', 'nlight'),
        ('mesh', 'nmesh'),
        ('hfield', 'nhfield'),
        ('texture', 'ntex'),
        ('material', 'nmat'),
        ('equality', 'neq'),
        ('tendon', 'ntendon'),
        ('actuator', 'nu'),
        ('sensor', 'nsensor'),
        ('numeric', 'nnumeric'),
        ('text', 'ntext'),
        ('tuple', 'ntuple'),
    ]
    for object_type, size_field in object_types_and_size_fields:
      for idx in range(getattr(model, size_field)):
        object_name = model.id2name(idx, object_type)
        self.assertNotEqual(object_name, '',
                            msg='Model {!r} contains unnamed {!r} with ID {}.'
                            .format(model.name, object_type, idx))

  @parameterized.parameters(*suite.ALL_TASKS)
  def test_model_has_at_least_2_cameras(self, domain, task):
    env = suite.load(domain, task)
    model = env.physics.model
    self.assertGreaterEqual(model.ncam, 2,
                            'Model {!r} should have at least 2 cameras, has {}.'
                            .format(model.name, model.ncam))

  @parameterized.parameters(*suite.ALL_TASKS)
  def test_task_conforms_to_spec(self, domain, task):
    """Tests that the environment timesteps conform to specifications."""
    is_benchmark = (domain, task) in suite.BENCHMARKING
    env = suite.load(domain, task)
    observation_spec = env.observation_spec()
    action_spec = env.action_spec()

    # Check action bounds.
    if is_benchmark:
      self._validate_control_range(action_spec.minimum, action_spec.maximum)

    # Step through the environment, applying random actions sampled within the
    # valid range and check the observations, rewards, and discounts.
    policy = uniform_random_policy(action_spec)
    for time_step in step_environment(env, policy):
      self._validate_observation(time_step.observation, observation_spec)
      self._validate_discount(time_step)
      if is_benchmark:
        self._validate_reward_range(time_step)

  @parameterized.parameters(*suite.ALL_TASKS)
  def test_environment_is_deterministic(self, domain, task):
    """Tests that identical seeds and actions produce identical trajectories."""
    seed = 0
    # Iterate over two trajectories generated using identical sequences of
    # random actions, and with identical task random states. Check that the
    # observations, rewards, discounts and step types are identical.
    trajectory1 = make_trajectory(domain=domain, task=task, seed=seed)
    trajectory2 = make_trajectory(domain=domain, task=task, seed=seed)
    for time_step1, time_step2 in zip(trajectory1, trajectory2):
      self.assertEqual(time_step1.step_type, time_step2.step_type)
      self.assertEqual(time_step1.reward, time_step2.reward)
      self.assertEqual(time_step1.discount, time_step2.discount)
      for key in six.iterkeys(time_step1.observation):
        np.testing.assert_array_equal(
            time_step1.observation[key], time_step2.observation[key],
            err_msg='Observation {!r} is not equal.'.format(key))

  def assertCorrectColors(self, physics, reward):
    colors = physics.named.model.mat_rgba
    for material_name in ('self', 'effector', 'target'):
      highlight = colors[material_name + '_highlight']
      default = colors[material_name + '_default']
      blend_coef = reward ** 4
      expected = blend_coef * highlight + (1.0 - blend_coef) * default
      actual = colors[material_name]
      err_msg = ('Material {!r} has unexpected color.\nExpected: {!r}\n'
                 'Actual: {!r}'.format(material_name, expected, actual))
      np.testing.assert_array_almost_equal(expected, actual, err_msg=err_msg)

  @parameterized.parameters(*suite.ALL_TASKS)
  def test_visualize_reward(self, domain, task):
    env = suite.load(domain, task)
    env.task.visualize_reward = True
    action = np.zeros(env.action_spec().shape)

    with mock.patch.object(env.task, 'get_reward') as mock_get_reward:
      mock_get_reward.return_value = -3.0  # Rewards < 0 should be clipped.
      env.reset()
      mock_get_reward.assert_called_with(env.physics)
      self.assertCorrectColors(env.physics, reward=0.0)

      mock_get_reward.reset_mock()
      mock_get_reward.return_value = 0.5
      env.step(action)
      mock_get_reward.assert_called_with(env.physics)
      self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value)

      mock_get_reward.reset_mock()
      mock_get_reward.return_value = 2.0  # Rewards > 1 should be clipped.
      env.step(action)
      mock_get_reward.assert_called_with(env.physics)
      self.assertCorrectColors(env.physics, reward=1.0)

      mock_get_reward.reset_mock()
      mock_get_reward.return_value = 0.25
      env.reset()
      mock_get_reward.assert_called_with(env.physics)
      self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value)

  @parameterized.parameters(*suite.ALL_TASKS)
  def test_task_supports_environment_kwargs(self, domain, task):
    env = suite.load(domain, task,
                     environment_kwargs=dict(flat_observation=True))
    # Check that the kwargs are actually passed through to the environment.
    self.assertSetEqual(set(env.observation_spec()),
                        {control.FLAT_OBSERVATION_KEY})

  @parameterized.parameters(*suite.ALL_TASKS)
  def test_observation_arrays_dont_share_memory(self, domain, task):
    env = suite.load(domain, task)
    first_timestep = env.reset()
    action = np.zeros(env.action_spec().shape)
    second_timestep = env.step(action)
    for name, first_array in six.iteritems(first_timestep.observation):
      second_array = second_timestep.observation[name]
      self.assertFalse(
          np.may_share_memory(first_array, second_array),
          msg='Consecutive observations of {!r} may share memory.'.format(name))

  @parameterized.parameters(*suite.ALL_TASKS)
  def test_observations_dont_contain_constant_elements(self, domain, task):
    env = suite.load(domain, task)
    trajectory = make_trajectory(domain=domain, task=task, seed=0,
                                 num_episodes=2, max_steps_per_episode=1000)
    observations = {name: [] for name in env.observation_spec()}
    for time_step in trajectory:
      for name, array in six.iteritems(time_step.observation):
        observations[name].append(array)

    failures = []

    for name, array_list in six.iteritems(observations):
      # Sampling random uniform actions generally isn't sufficient to trigger
      # these touch sensors.
      if (domain in ('manipulator', 'stacker') and name == 'touch' or
          domain == 'quadruped' and name == 'force_torque'):
        continue
      stacked_arrays = np.array(array_list)
      is_constant = np.all(stacked_arrays == stacked_arrays[0], axis=0)
      has_constant_elements = (
          is_constant if np.isscalar(is_constant) else np.any(is_constant))
      if has_constant_elements:
        failures.append((name, is_constant))

    self.assertEmpty(
        failures,
        msg='The following observation(s) contain constant elements:\n{}'
        .format('\n'.join(':\t'.join([name, str(is_constant)])
                          for (name, is_constant) in failures)))

  @parameterized.parameters(*suite.ALL_TASKS)
  def test_initial_state_is_randomized(self, domain, task):
    env = suite.load(domain, task, task_kwargs={'random': 42})
    obs1 = env.reset().observation
    obs2 = env.reset().observation
    self.assertFalse(
        all(np.all(obs1[k] == obs2[k]) for k in obs1),
        'Two consecutive initial states have identical observations.\n'
        'First: {}\nSecond: {}'.format(obs1, obs2))

if __name__ == '__main__':
  absltest.main()