sac_ae_if/local_dm_control_suite/lqr.py

# Copyright 2017 The dm_control Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Procedurally generated LQR domain."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import os

from dm_control import mujoco
from dm_control.rl import control
from local_dm_control_suite import base
from local_dm_control_suite import common
from dm_control.utils import containers
from dm_control.utils import xml_tools
from lxml import etree
import numpy as np
from six.moves import range

from dm_control.utils import io as resources

_DEFAULT_TIME_LIMIT = float('inf')
_CONTROL_COST_COEF = 0.1
SUITE = containers.TaggedTasks()


def get_model_and_assets(n_bodies, n_actuators, random):
  """Returns the model description as an XML string and a dict of assets.

  Args:
    n_bodies: An int, number of bodies of the LQR.
    n_actuators: An int, number of actuated bodies of the LQR. `n_actuators`
      should be less or equal than `n_bodies`.
    random: A `numpy.random.RandomState` instance.

  Returns:
    A tuple `(model_xml_string, assets)`, where `assets` is a dict consisting of
    `{filename: contents_string}` pairs.
  """
  return _make_model(n_bodies, n_actuators, random), common.ASSETS


@SUITE.add()
def lqr_2_1(time_limit=_DEFAULT_TIME_LIMIT, random=None,
            environment_kwargs=None):
  """Returns an LQR environment with 2 bodies of which the first is actuated."""
  return _make_lqr(n_bodies=2,
                   n_actuators=1,
                   control_cost_coef=_CONTROL_COST_COEF,
                   time_limit=time_limit,
                   random=random,
                   environment_kwargs=environment_kwargs)


@SUITE.add()
def lqr_6_2(time_limit=_DEFAULT_TIME_LIMIT, random=None,
            environment_kwargs=None):
  """Returns an LQR environment with 6 bodies of which first 2 are actuated."""
  return _make_lqr(n_bodies=6,
                   n_actuators=2,
                   control_cost_coef=_CONTROL_COST_COEF,
                   time_limit=time_limit,
                   random=random,
                   environment_kwargs=environment_kwargs)


def _make_lqr(n_bodies, n_actuators, control_cost_coef, time_limit, random,
              environment_kwargs):
  """Returns a LQR environment.

  Args:
    n_bodies: An int, number of bodies of the LQR.
    n_actuators: An int, number of actuated bodies of the LQR. `n_actuators`
      should be less or equal than `n_bodies`.
    control_cost_coef: A number, the coefficient of the control cost.
    time_limit: An int, maximum time for each episode in seconds.
    random: Either an existing `numpy.random.RandomState` instance, an
      integer seed for creating a new `RandomState`, or None to select a seed
      automatically.
    environment_kwargs: A `dict` specifying keyword arguments for the
      environment, or None.

  Returns:
    A LQR environment with `n_bodies` bodies of which first `n_actuators` are
    actuated.
  """

  if not isinstance(random, np.random.RandomState):
    random = np.random.RandomState(random)

  model_string, assets = get_model_and_assets(n_bodies, n_actuators,
                                              random=random)
  physics = Physics.from_xml_string(model_string, assets=assets)
  task = LQRLevel(control_cost_coef, random=random)
  environment_kwargs = environment_kwargs or {}
  return control.Environment(physics, task, time_limit=time_limit,
                             **environment_kwargs)


def _make_body(body_id, stiffness_range, damping_range, random):
  """Returns an `etree.Element` defining a body.

  Args:
    body_id: Id of the created body.
    stiffness_range: A tuple of (stiffness_lower_bound, stiffness_uppder_bound).
      The stiffness of the joint is drawn uniformly from this range.
    damping_range: A tuple of (damping_lower_bound, damping_upper_bound). The
      damping of the joint is drawn uniformly from this range.
    random: A `numpy.random.RandomState` instance.

  Returns:
   A new instance of `etree.Element`. A body element with two children: joint
   and geom.
  """
  body_name = 'body_{}'.format(body_id)
  joint_name = 'joint_{}'.format(body_id)
  geom_name = 'geom_{}'.format(body_id)

  body = etree.Element('body', name=body_name)
  body.set('pos', '.25 0 0')
  joint = etree.SubElement(body, 'joint', name=joint_name)
  body.append(etree.Element('geom', name=geom_name))
  joint.set('stiffness',
            str(random.uniform(stiffness_range[0], stiffness_range[1])))
  joint.set('damping',
            str(random.uniform(damping_range[0], damping_range[1])))
  return body


def _make_model(n_bodies,
                n_actuators,
                random,
                stiffness_range=(15, 25),
                damping_range=(0, 0)):
  """Returns an MJCF XML string defining a model of springs and dampers.

  Args:
    n_bodies: An integer, the number of bodies (DoFs) in the system.
    n_actuators: An integer, the number of actuated bodies.
    random: A `numpy.random.RandomState` instance.
    stiffness_range: A tuple containing minimum and maximum stiffness. Each
      joint's stiffness is sampled uniformly from this interval.
    damping_range: A tuple containing minimum and maximum damping. Each joint's
      damping is sampled uniformly from this interval.

  Returns:
    An MJCF string describing the linear system.

  Raises:
    ValueError: If the number of bodies or actuators is erronous.
  """
  if n_bodies < 1 or n_actuators < 1:
    raise ValueError('At least 1 body and 1 actuator required.')
  if n_actuators > n_bodies:
    raise ValueError('At most 1 actuator per body.')

  file_path = os.path.join(os.path.dirname(__file__), 'lqr.xml')
  with resources.GetResourceAsFile(file_path) as xml_file:
    mjcf = xml_tools.parse(xml_file)
  parent = mjcf.find('./worldbody')
  actuator = etree.SubElement(mjcf.getroot(), 'actuator')
  tendon = etree.SubElement(mjcf.getroot(), 'tendon')

  for body in range(n_bodies):
    # Inserting body.
    child = _make_body(body, stiffness_range, damping_range, random)
    site_name = 'site_{}'.format(body)
    child.append(etree.Element('site', name=site_name))

    if body == 0:
      child.set('pos', '.25 0 .1')
    # Add actuators to the first n_actuators bodies.
    if body < n_actuators:
      # Adding actuator.
      joint_name = 'joint_{}'.format(body)
      motor_name = 'motor_{}'.format(body)
      child.find('joint').set('name', joint_name)
      actuator.append(etree.Element('motor', name=motor_name, joint=joint_name))

    # Add a tendon between consecutive bodies (for visualisation purposes only).
    if body < n_bodies - 1:
      child_site_name = 'site_{}'.format(body + 1)
      tendon_name = 'tendon_{}'.format(body)
      spatial = etree.SubElement(tendon, 'spatial', name=tendon_name)
      spatial.append(etree.Element('site', site=site_name))
      spatial.append(etree.Element('site', site=child_site_name))
    parent.append(child)
    parent = child

  return etree.tostring(mjcf, pretty_print=True)


class Physics(mujoco.Physics):
  """Physics simulation with additional features for the LQR domain."""

  def state_norm(self):
    """Returns the norm of the physics state."""
    return np.linalg.norm(self.state())


class LQRLevel(base.Task):
  """A Linear Quadratic Regulator `Task`."""

  _TERMINAL_TOL = 1e-6

  def __init__(self, control_cost_coef, random=None):
    """Initializes an LQR level with cost = sum(states^2) + c*sum(controls^2).

    Args:
      control_cost_coef: The coefficient of the control cost.
      random: Optional, either a `numpy.random.RandomState` instance, an
        integer seed for creating a new `RandomState`, or None to select a seed
        automatically (default).

    Raises:
      ValueError: If the control cost coefficient is not positive.
    """
    if control_cost_coef <= 0:
      raise ValueError('control_cost_coef must be positive.')

    self._control_cost_coef = control_cost_coef
    super(LQRLevel, self).__init__(random=random)

  @property
  def control_cost_coef(self):
    return self._control_cost_coef

  def initialize_episode(self, physics):
    """Random state sampled from a unit sphere."""
    ndof = physics.model.nq
    unit = self.random.randn(ndof)
    physics.data.qpos[:] = np.sqrt(2) * unit / np.linalg.norm(unit)
    super(LQRLevel, self).initialize_episode(physics)

  def get_observation(self, physics):
    """Returns an observation of the state."""
    obs = collections.OrderedDict()
    obs['position'] = physics.position()
    obs['velocity'] = physics.velocity()
    return obs

  def get_reward(self, physics):
    """Returns a quadratic state and control reward."""
    position = physics.position()
    state_cost = 0.5 * np.dot(position, position)
    control_signal = physics.control()
    control_l2_norm = 0.5 * np.dot(control_signal, control_signal)
    return 1 - (state_cost + control_l2_norm * self._control_cost_coef)

  def get_evaluation(self, physics):
    """Returns a sparse evaluation reward that is not used for learning."""
    return float(physics.state_norm() <= 0.01)

  def get_termination(self, physics):
    """Terminates when the state norm is smaller than epsilon."""
    if physics.state_norm() < self._TERMINAL_TOL:
      return 0.0