Adding Denoised Predictive Imagination

2023-03-23 15:05:28 +01:00 · 2023-03-23 15:05:28 +01:00 · abaca2bea9
commit abaca2bea9
parent 69d1528077
72 changed files with 9065 additions and 0 deletions
--- a/DPI/Image/Image9.png
+++ b/DPI/Image/Image9.png
--- a/DPI/conda_env.yml
+++ b/DPI/conda_env.yml
@ -0,0 +1,25 @@
+name: dpi
+channels:
+  - defaults
+dependencies:
+  - python=3.7
+  - pytorch
+  - torchvision
+  - cudatoolkit=9.2
+  - absl-py
+  - pyparsing
+  - pip:
+    - termcolor
+    - git+git://github.com/deepmind/dm_control.git
+    - git+git://github.com/1nadequacy/dmc2gym.git
+    - opencv-python
+    - pillow==6.1
+    - scikit-image
+    - scikit-video
+    - tb-nightly
+    - tqdm
+    - imageio
+    - imageio-ffmpeg
+    - pygame
+    - networkx
+    - dotmap
--- a/DPI/dmc2gym/init.py
+++ b/DPI/dmc2gym/init.py
@ -0,0 +1,52 @@
+import gym
+from gym.envs.registration import register
+
+
+def make(
+    domain_name,
+    task_name,
+    resource_files,
+    img_source,
+    total_frames,
+    seed=1,
+    visualize_reward=True,
+    from_pixels=False,
+    height=84,
+    width=84,
+    camera_id=0,
+    frame_skip=1,
+    episode_length=1000,
+    environment_kwargs=None
+):
+    env_id = 'dmc_%s_%s_%s-v1' % (domain_name, task_name, seed)
+
+    if from_pixels:
+        assert not visualize_reward, 'cannot use visualize reward when learning from pixels'
+
+    # shorten episode length
+    max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
+    
+    if not env_id in gym.envs.registry.env_specs:
+        register(
+            id=env_id,
+            entry_point='dmc2gym.wrappers:DMCWrapper',
+            kwargs={
+                'domain_name': domain_name,
+                'task_name': task_name,
+                'resource_files': resource_files,
+                'img_source': img_source,
+                'total_frames': total_frames,
+                'task_kwargs': {
+                    'random': seed
+                },
+                'environment_kwargs': environment_kwargs,
+                'visualize_reward': visualize_reward,
+                'from_pixels': from_pixels,
+                'height': height,
+                'width': width,
+                'camera_id': camera_id,
+                'frame_skip': frame_skip,
+            },
+            max_episode_steps=max_episode_steps
+        )
+    return gym.make(env_id)
--- a/DPI/dmc2gym/natural_imgsource.py
+++ b/DPI/dmc2gym/natural_imgsource.py
@ -0,0 +1,183 @@
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import cv2
+import skvideo.io
+import random
+import tqdm
+
+class BackgroundMatting(object):
+    """
+    Produce a mask by masking the given color. This is a simple strategy
+    but effective for many games.
+    """
+    def __init__(self, color):
+        """
+        Args:
+            color: a (r, g, b) tuple or single value for grayscale
+        """
+        self._color = color
+
+    def get_mask(self, img):
+        return img == self._color
+
+
+class ImageSource(object):
+    """
+    Source of natural images to be added to a simulated environment.
+    """
+    def get_image(self):
+        """
+        Returns:
+            an RGB image of [h, w, 3] with a fixed shape.
+        """
+        pass
+
+    def reset(self):
+        """ Called when an episode ends. """
+        pass
+
+
+class FixedColorSource(ImageSource):
+    def __init__(self, shape, color):
+        """
+        Args:
+            shape: [h, w]
+            color: a 3-tuple
+        """
+        self.arr = np.zeros((shape[0], shape[1], 3))
+        self.arr[:, :] = color
+
+    def get_image(self):
+        return self.arr
+
+
+class RandomColorSource(ImageSource):
+    def __init__(self, shape):
+        """
+        Args:
+            shape: [h, w]
+        """
+        self.shape = shape
+        self.arr = None
+        self.reset()
+
+    def reset(self):
+        self._color = np.random.randint(0, 256, size=(3,))
+        self.arr = np.zeros((self.shape[0], self.shape[1], 3))
+        self.arr[:, :] = self._color
+
+    def get_image(self):
+        return self.arr
+
+
+class NoiseSource(ImageSource):
+    def __init__(self, shape, strength=255):
+        """
+        Args:
+            shape: [h, w]
+            strength (int): the strength of noise, in range [0, 255]
+        """
+        self.shape = shape
+        self.strength = strength
+
+    def get_image(self):
+        return np.random.randn(self.shape[0], self.shape[1], 3) * self.strength
+
+
+class RandomImageSource(ImageSource):
+    def __init__(self, shape, filelist, total_frames=None, grayscale=False):
+        """
+        Args:
+            shape: [h, w]
+            filelist: a list of image files
+        """
+        self.grayscale = grayscale
+        self.total_frames = total_frames
+        self.shape = shape
+        self.filelist = filelist
+        self.build_arr()
+        self.current_idx = 0
+        self.reset()
+
+    def build_arr(self):
+        self.total_frames = self.total_frames if self.total_frames else len(self.filelist)
+        self.arr = np.zeros((self.total_frames, self.shape[0], self.shape[1]) + ((3,) if not self.grayscale else (1,)))
+        for i in range(self.total_frames):
+            # if i % len(self.filelist) == 0: random.shuffle(self.filelist)
+            fname = self.filelist[i % len(self.filelist)]
+            if self.grayscale: im = cv2.imread(fname, cv2.IMREAD_GRAYSCALE)[..., None]
+            else:              im = cv2.imread(fname, cv2.IMREAD_COLOR)
+            self.arr[i] = cv2.resize(im, (self.shape[1], self.shape[0])) ## THIS IS NOT A BUG! cv2 uses (width, height)
+
+    def reset(self):
+        self._loc = np.random.randint(0, self.total_frames)
+
+    def get_image(self):
+        return self.arr[self._loc]
+
+
+class RandomVideoSource(ImageSource):
+    def __init__(self, shape, filelist, total_frames=None, grayscale=False):
+        """
+        Args:
+            shape: [h, w]
+            filelist: a list of video files
+        """
+        self.grayscale = grayscale
+        self.total_frames = total_frames
+        self.shape = shape
+        self.filelist = filelist
+        self.build_arr()
+        self.current_idx = 0
+        self.reset()
+
+    def build_arr(self):
+        if not self.total_frames:
+            self.total_frames = 0
+            self.arr = None
+            random.shuffle(self.filelist)
+            for fname in tqdm.tqdm(self.filelist, desc="Loading videos for natural", position=0):
+                if self.grayscale: frames = skvideo.io.vread(fname, outputdict={"-pix_fmt": "gray"})
+                else:              frames = skvideo.io.vread(fname)
+                local_arr = np.zeros((frames.shape[0], self.shape[0], self.shape[1]) + ((3,) if not self.grayscale else (1,)))
+                for i in tqdm.tqdm(range(frames.shape[0]), desc="video frames", position=1):
+                    local_arr[i] = cv2.resize(frames[i], (self.shape[1], self.shape[0])) ## THIS IS NOT A BUG! cv2 uses (width, height)
+                if self.arr is None:
+                    self.arr = local_arr
+                else:
+                    self.arr = np.concatenate([self.arr, local_arr], 0)
+                self.total_frames += local_arr.shape[0]
+        else:
+            self.arr = np.zeros((self.total_frames, self.shape[0], self.shape[1]) + ((3,) if not self.grayscale else (1,)))
+            total_frame_i = 0
+            file_i = 0
+            with tqdm.tqdm(total=self.total_frames, desc="Loading videos for natural") as pbar:
+                while total_frame_i < self.total_frames:
+                    if file_i % len(self.filelist) == 0: random.shuffle(self.filelist)
+                    file_i += 1
+                    fname = self.filelist[file_i % len(self.filelist)]
+                    if self.grayscale: frames = skvideo.io.vread(fname, outputdict={"-pix_fmt": "gray"})
+                    else:              frames = skvideo.io.vread(fname)
+                    for frame_i in range(frames.shape[0]):
+                        if total_frame_i >= self.total_frames: break
+                        if self.grayscale:
+                            self.arr[total_frame_i] = cv2.resize(frames[frame_i], (self.shape[1], self.shape[0]))[..., None] ## THIS IS NOT A BUG! cv2 uses (width, height)
+                        else:
+                            self.arr[total_frame_i] = cv2.resize(frames[frame_i], (self.shape[1], self.shape[0])) 
+                        pbar.update(1)
+                        total_frame_i += 1
+
+
+    def reset(self):
+        self._loc = np.random.randint(0, self.total_frames)
+
+    def get_image(self):
+        img = self.arr[self._loc % self.total_frames]
+        self._loc += 1
+        return img
--- a/DPI/dmc2gym/wrappers.py
+++ b/DPI/dmc2gym/wrappers.py
@ -0,0 +1,198 @@
+from gym import core, spaces
+import glob
+import os
+import local_dm_control_suite as suite
+from dm_env import specs
+import numpy as np
+import skimage.io
+
+from dmc2gym import natural_imgsource
+
+
+def _spec_to_box(spec):
+    def extract_min_max(s):
+        assert s.dtype == np.float64 or s.dtype == np.float32
+        dim = np.int(np.prod(s.shape))
+        if type(s) == specs.Array:
+            bound = np.inf * np.ones(dim, dtype=np.float32)
+            return -bound, bound
+        elif type(s) == specs.BoundedArray:
+            zeros = np.zeros(dim, dtype=np.float32)
+            return s.minimum + zeros, s.maximum + zeros
+
+    mins, maxs = [], []
+    for s in spec:
+        mn, mx = extract_min_max(s)
+        mins.append(mn)
+        maxs.append(mx)
+    low = np.concatenate(mins, axis=0)
+    high = np.concatenate(maxs, axis=0)
+    assert low.shape == high.shape
+    return spaces.Box(low, high, dtype=np.float32)
+
+
+def _flatten_obs(obs):
+    obs_pieces = []
+    for v in obs.values():
+        flat = np.array([v]) if np.isscalar(v) else v.ravel()
+        obs_pieces.append(flat)
+    return np.concatenate(obs_pieces, axis=0)
+
+
+class DMCWrapper(core.Env):
+    def __init__(
+        self,
+        domain_name,
+        task_name,
+        resource_files,
+        img_source,
+        total_frames,
+        task_kwargs=None,
+        visualize_reward={},
+        from_pixels=False,
+        height=84,
+        width=84,
+        camera_id=0,
+        frame_skip=1,
+        environment_kwargs=None
+    ):
+        assert 'random' in task_kwargs, 'please specify a seed, for deterministic behaviour'
+        self._from_pixels = from_pixels
+        self._height = height
+        self._width = width
+        self._camera_id = camera_id
+        self._frame_skip = frame_skip
+        self._img_source = img_source
+
+        # create task
+        self._env = suite.load(
+            domain_name=domain_name,
+            task_name=task_name,
+            task_kwargs=task_kwargs,
+            visualize_reward=visualize_reward,
+            environment_kwargs=environment_kwargs
+        )
+
+        # true and normalized action spaces
+        self._true_action_space = _spec_to_box([self._env.action_spec()])
+        self._norm_action_space = spaces.Box(
+            low=-1.0,
+            high=1.0,
+            shape=self._true_action_space.shape,
+            dtype=np.float32
+        )
+
+        # create observation space
+        if from_pixels:
+            self._observation_space = spaces.Box(
+                low=0, high=255, shape=[3, height, width], dtype=np.uint8
+            )
+        else:
+            self._observation_space = _spec_to_box(
+                self._env.observation_spec().values()
+            )
+
+        self._internal_state_space = spaces.Box(
+            low=-np.inf,
+            high=np.inf,
+            shape=self._env.physics.get_state().shape,
+            dtype=np.float32
+        )
+
+        # background
+        if img_source is not None:
+            shape2d = (height, width)
+            if img_source == "color":
+                self._bg_source = natural_imgsource.RandomColorSource(shape2d)
+            elif img_source == "noise":
+                self._bg_source = natural_imgsource.NoiseSource(shape2d)
+            else:
+                files = glob.glob(os.path.expanduser(resource_files))
+                assert len(files), "Pattern {} does not match any files".format(
+                    resource_files
+                )
+                if img_source == "images":
+                    self._bg_source = natural_imgsource.RandomImageSource(shape2d, files, grayscale=True, total_frames=total_frames)
+                elif img_source == "video":
+                    self._bg_source = natural_imgsource.RandomVideoSource(shape2d, files, grayscale=True, total_frames=total_frames)
+                else:
+                    raise Exception("img_source %s not defined." % img_source)
+
+        # set seed
+        self.seed(seed=task_kwargs.get('random', 1))
+
+    def __getattr__(self, name):
+        return getattr(self._env, name)
+
+    def _get_obs(self, time_step):
+        if self._from_pixels:
+            obs = self.render(
+                height=self._height,
+                width=self._width,
+                camera_id=self._camera_id
+            )
+            if self._img_source is not None:
+                mask = np.logical_and((obs[:, :, 2] > obs[:, :, 1]), (obs[:, :, 2] > obs[:, :, 0]))  # hardcoded for dmc
+                bg = self._bg_source.get_image()
+                obs[mask] = bg[mask]
+            obs = obs.transpose(2, 0, 1).copy()
+        else:
+            obs = _flatten_obs(time_step.observation)
+        return obs
+
+    def _convert_action(self, action):
+        action = action.astype(np.float64)
+        true_delta = self._true_action_space.high - self._true_action_space.low
+        norm_delta = self._norm_action_space.high - self._norm_action_space.low
+        action = (action - self._norm_action_space.low) / norm_delta
+        action = action * true_delta + self._true_action_space.low
+        action = action.astype(np.float32)
+        return action
+
+    @property
+    def observation_space(self):
+        return self._observation_space
+
+    @property
+    def internal_state_space(self):
+        return self._internal_state_space
+
+    @property
+    def action_space(self):
+        return self._norm_action_space
+
+    def seed(self, seed):
+        self._true_action_space.seed(seed)
+        self._norm_action_space.seed(seed)
+        self._observation_space.seed(seed)
+
+    def step(self, action):
+        assert self._norm_action_space.contains(action)
+        action = self._convert_action(action)
+        assert self._true_action_space.contains(action)
+        reward = 0
+        extra = {'internal_state': self._env.physics.get_state().copy()}
+
+        for _ in range(self._frame_skip):
+            time_step = self._env.step(action)
+            reward += time_step.reward or 0
+            done = time_step.last()
+            if done:
+                break
+        obs = self._get_obs(time_step)
+        extra['discount'] = time_step.discount
+        return obs, reward, done, extra
+
+    def reset(self):
+        time_step = self._env.reset()
+        obs = self._get_obs(time_step)
+        return obs
+
+    def render(self, mode='rgb_array', height=None, width=None, camera_id=0):
+        assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
+        height = height or self._height
+        width = width or self._width
+        camera_id = camera_id or self._camera_id
+        return self._env.physics.render(
+            height=height, width=width, camera_id=camera_id
+        )
--- a/DPI/local_dm_control_suite/README.md
+++ b/DPI/local_dm_control_suite/README.md
@ -0,0 +1,56 @@
+# DeepMind Control Suite.
+
+This submodule contains the domains and tasks described in the
+[DeepMind Control Suite tech report](https://arxiv.org/abs/1801.00690).
+
+## Quickstart
+
+```python
+from dm_control import suite
+import numpy as np
+
+# Load one task:
+env = suite.load(domain_name="cartpole", task_name="swingup")
+
+# Iterate over a task set:
+for domain_name, task_name in suite.BENCHMARKING:
+  env = suite.load(domain_name, task_name)
+
+# Step through an episode and print out reward, discount and observation.
+action_spec = env.action_spec()
+time_step = env.reset()
+while not time_step.last():
+  action = np.random.uniform(action_spec.minimum,
+                             action_spec.maximum,
+                             size=action_spec.shape)
+  time_step = env.step(action)
+  print(time_step.reward, time_step.discount, time_step.observation)
+```
+
+## Illustration video
+
+Below is a video montage of solved Control Suite tasks, with reward
+visualisation enabled.
+
+[![Video montage](https://img.youtube.com/vi/rAai4QzcYbs/0.jpg)](https://www.youtube.com/watch?v=rAai4QzcYbs)
+
+
+### Quadruped domain [April 2019]
+
+Roughly based on the 'ant' model introduced by [Schulman et al. 2015](https://arxiv.org/abs/1506.02438). Main modifications to the body are:
+
+- 4 DoFs per leg, 1 constraining tendon.
+- 3 actuators per leg: 'yaw', 'lift', 'extend'.
+- Filtered position actuators with timescale of 100ms.
+- Sensors include an IMU, force/torque sensors, and rangefinders.
+
+Four tasks:
+
+- `walk` and `run`: self-right the body then move forward at a desired speed.
+- `escape`: escape a bowl-shaped random terrain (uses rangefinders).
+- `fetch`, go to a moving ball and bring it to a target.
+
+All behaviors in the video below were trained with [Abdolmaleki et al's
+MPO](https://arxiv.org/abs/1806.06920).
+
+[![Video montage](https://img.youtube.com/vi/RhRLjbb7pBE/0.jpg)](https://www.youtube.com/watch?v=RhRLjbb7pBE)
--- a/DPI/local_dm_control_suite/init.py
+++ b/DPI/local_dm_control_suite/init.py
@ -0,0 +1,151 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""A collection of MuJoCo-based Reinforcement Learning environments."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import inspect
+import itertools
+
+from dm_control.rl import control
+
+from local_dm_control_suite import acrobot
+from local_dm_control_suite import ball_in_cup
+from local_dm_control_suite import cartpole
+from local_dm_control_suite import cheetah
+from local_dm_control_suite import finger
+from local_dm_control_suite import fish
+from local_dm_control_suite import hopper
+from local_dm_control_suite import humanoid
+from local_dm_control_suite import humanoid_CMU
+from local_dm_control_suite import lqr
+from local_dm_control_suite import manipulator
+from local_dm_control_suite import pendulum
+from local_dm_control_suite import point_mass
+from local_dm_control_suite import quadruped
+from local_dm_control_suite import reacher
+from local_dm_control_suite import stacker
+from local_dm_control_suite import swimmer
+from local_dm_control_suite import walker
+
+# Find all domains imported.
+_DOMAINS = {name: module for name, module in locals().items()
+            if inspect.ismodule(module) and hasattr(module, 'SUITE')}
+
+
+def _get_tasks(tag):
+  """Returns a sequence of (domain name, task name) pairs for the given tag."""
+  result = []
+
+  for domain_name in sorted(_DOMAINS.keys()):
+    domain = _DOMAINS[domain_name]
+
+    if tag is None:
+      tasks_in_domain = domain.SUITE
+    else:
+      tasks_in_domain = domain.SUITE.tagged(tag)
+
+    for task_name in tasks_in_domain.keys():
+      result.append((domain_name, task_name))
+
+  return tuple(result)
+
+
+def _get_tasks_by_domain(tasks):
+  """Returns a dict mapping from task name to a tuple of domain names."""
+  result = collections.defaultdict(list)
+
+  for domain_name, task_name in tasks:
+    result[domain_name].append(task_name)
+
+  return {k: tuple(v) for k, v in result.items()}
+
+
+# A sequence containing all (domain name, task name) pairs.
+ALL_TASKS = _get_tasks(tag=None)
+
+# Subsets of ALL_TASKS, generated via the tag mechanism.
+BENCHMARKING = _get_tasks('benchmarking')
+EASY = _get_tasks('easy')
+HARD = _get_tasks('hard')
+EXTRA = tuple(sorted(set(ALL_TASKS) - set(BENCHMARKING)))
+
+# A mapping from each domain name to a sequence of its task names.
+TASKS_BY_DOMAIN = _get_tasks_by_domain(ALL_TASKS)
+
+
+def load(domain_name, task_name, task_kwargs=None, environment_kwargs=None,
+         visualize_reward=False):
+  """Returns an environment from a domain name, task name and optional settings.
+
+  ```python
+  env = suite.load('cartpole', 'balance')
+  ```
+
+  Args:
+    domain_name: A string containing the name of a domain.
+    task_name: A string containing the name of a task.
+    task_kwargs: Optional `dict` of keyword arguments for the task.
+    environment_kwargs: Optional `dict` specifying keyword arguments for the
+      environment.
+    visualize_reward: Optional `bool`. If `True`, object colours in rendered
+      frames are set to indicate the reward at each step. Default `False`.
+
+  Returns:
+    The requested environment.
+  """
+  return build_environment(domain_name, task_name, task_kwargs,
+                           environment_kwargs, visualize_reward)
+
+
+def build_environment(domain_name, task_name, task_kwargs=None,
+                      environment_kwargs=None, visualize_reward=False):
+  """Returns an environment from the suite given a domain name and a task name.
+
+  Args:
+    domain_name: A string containing the name of a domain.
+    task_name: A string containing the name of a task.
+    task_kwargs: Optional `dict` specifying keyword arguments for the task.
+    environment_kwargs: Optional `dict` specifying keyword arguments for the
+      environment.
+    visualize_reward: Optional `bool`. If `True`, object colours in rendered
+      frames are set to indicate the reward at each step. Default `False`.
+
+  Raises:
+    ValueError: If the domain or task doesn't exist.
+
+  Returns:
+    An instance of the requested environment.
+  """
+  if domain_name not in _DOMAINS:
+    raise ValueError('Domain {!r} does not exist.'.format(domain_name))
+
+  domain = _DOMAINS[domain_name]
+
+  if task_name not in domain.SUITE:
+    raise ValueError('Level {!r} does not exist in domain {!r}.'.format(
+        task_name, domain_name))
+
+  task_kwargs = task_kwargs or {}
+  if environment_kwargs is not None:
+    task_kwargs = task_kwargs.copy()
+    task_kwargs['environment_kwargs'] = environment_kwargs
+  env = domain.SUITE[task_name](**task_kwargs)
+  env.task.visualize_reward = visualize_reward
+  return env
--- a/DPI/local_dm_control_suite/acrobot.py
+++ b/DPI/local_dm_control_suite/acrobot.py
@ -0,0 +1,127 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Acrobot domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.utils import containers
+from dm_control.utils import rewards
+import numpy as np
+
+_DEFAULT_TIME_LIMIT = 10
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('acrobot.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking')
+def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns Acrobot balance task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Balance(sparse=False, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def swingup_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+                   environment_kwargs=None):
+  """Returns Acrobot sparse balance."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Balance(sparse=True, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Acrobot domain."""
+
+  def horizontal(self):
+    """Returns horizontal (x) component of body frame z-axes."""
+    return self.named.data.xmat[['upper_arm', 'lower_arm'], 'xz']
+
+  def vertical(self):
+    """Returns vertical (z) component of body frame z-axes."""
+    return self.named.data.xmat[['upper_arm', 'lower_arm'], 'zz']
+
+  def to_target(self):
+    """Returns the distance from the tip to the target."""
+    tip_to_target = (self.named.data.site_xpos['target'] -
+                     self.named.data.site_xpos['tip'])
+    return np.linalg.norm(tip_to_target)
+
+  def orientations(self):
+    """Returns the sines and cosines of the pole angles."""
+    return np.concatenate((self.horizontal(), self.vertical()))
+
+
+class Balance(base.Task):
+  """An Acrobot `Task` to swing up and balance the pole."""
+
+  def __init__(self, sparse, random=None):
+    """Initializes an instance of `Balance`.
+
+    Args:
+      sparse: A `bool` specifying whether to use a sparse (indicator) reward.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._sparse = sparse
+    super(Balance, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+
+    Shoulder and elbow are set to a random position between [-pi, pi).
+
+    Args:
+      physics: An instance of `Physics`.
+    """
+    physics.named.data.qpos[
+        ['shoulder', 'elbow']] = self.random.uniform(-np.pi, np.pi, 2)
+    super(Balance, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of pole orientation and angular velocities."""
+    obs = collections.OrderedDict()
+    obs['orientations'] = physics.orientations()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def _get_reward(self, physics, sparse):
+    target_radius = physics.named.model.site_size['target', 0]
+    return rewards.tolerance(physics.to_target(),
+                             bounds=(0, target_radius),
+                             margin=0 if sparse else 1)
+
+  def get_reward(self, physics):
+    """Returns a sparse or a smooth reward, as specified in the constructor."""
+    return self._get_reward(physics, sparse=self._sparse)
--- a/DPI/local_dm_control_suite/acrobot.xml
+++ b/DPI/local_dm_control_suite/acrobot.xml
@ -0,0 +1,43 @@
+<!--
+Based on Coulomb's [1] rather than Spong's [2] model.
+[1] Coulom, Rémi. Reinforcement learning using neural networks, with applications to motor control.
+    Diss. Institut National Polytechnique de Grenoble-INPG, 2002.
+[2] Spong, Mark W. "The swing up control problem for the acrobot."
+    IEEE control systems 15, no. 1 (1995): 49-55.
+-->
+<mujoco model="acrobot">
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <default>
+    <joint damping=".05"/>
+    <geom type="capsule" mass="1"/>
+  </default>
+
+  <option timestep="0.01" integrator="RK4">
+    <flag constraint="disable" energy="enable"/>
+  </option>
+
+  <worldbody>
+    <light name="light" pos="0 0 6"/>
+    <geom name="floor" size="3 3 .2" type="plane" material="grid"/>
+    <site name="target" type="sphere" pos="0 0 4" size="0.2" material="target" group="3"/>
+    <camera name="fixed" pos="0 -6 2" zaxis="0 -1 0"/>
+    <camera name="lookat" mode="targetbodycom" target="upper_arm" pos="0 -2 3"/>
+    <body name="upper_arm" pos="0 0 2">
+      <joint name="shoulder" type="hinge" axis="0 1 0"/>
+      <geom name="upper_arm_decoration" material="decoration" type="cylinder" fromto="0 -.06 0 0 .06 0" size="0.051" mass="0"/>
+      <geom name="upper_arm" fromto="0 0 0 0 0 1" size="0.05" material="self"/>
+      <body name="lower_arm" pos="0 0 1">
+        <joint name="elbow" type="hinge" axis="0 1 0"/>
+        <geom name="lower_arm" fromto="0 0 0 0 0 1" size="0.049" material="self"/>
+        <site name="tip" pos="0 0 1" size="0.01"/>
+      </body>
+    </body>
+  </worldbody>
+
+   <actuator>
+    <motor name="elbow" joint="elbow" gear="2" ctrllimited="true" ctrlrange="-1 1"/>
+  </actuator>
+</mujoco>
--- a/DPI/local_dm_control_suite/ball_in_cup.py
+++ b/DPI/local_dm_control_suite/ball_in_cup.py
@ -0,0 +1,100 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Ball-in-Cup Domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.utils import containers
+
+_DEFAULT_TIME_LIMIT = 20  # (seconds)
+_CONTROL_TIMESTEP = .02   # (seconds)
+
+
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('ball_in_cup.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking', 'easy')
+def catch(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Ball-in-Cup task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = BallInCup(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics with additional features for the Ball-in-Cup domain."""
+
+  def ball_to_target(self):
+    """Returns the vector from the ball to the target."""
+    target = self.named.data.site_xpos['target', ['x', 'z']]
+    ball = self.named.data.xpos['ball', ['x', 'z']]
+    return target - ball
+
+  def in_target(self):
+    """Returns 1 if the ball is in the target, 0 otherwise."""
+    ball_to_target = abs(self.ball_to_target())
+    target_size = self.named.model.site_size['target', [0, 2]]
+    ball_size = self.named.model.geom_size['ball', 0]
+    return float(all(ball_to_target < target_size - ball_size))
+
+
+class BallInCup(base.Task):
+  """The Ball-in-Cup task. Put the ball in the cup."""
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+
+    Args:
+      physics: An instance of `Physics`.
+
+    """
+    # Find a collision-free random initial position of the ball.
+    penetrating = True
+    while penetrating:
+      # Assign a random ball position.
+      physics.named.data.qpos['ball_x'] = self.random.uniform(-.2, .2)
+      physics.named.data.qpos['ball_z'] = self.random.uniform(.2, .5)
+      # Check for collisions.
+      physics.after_reset()
+      penetrating = physics.data.ncon > 0
+    super(BallInCup, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of the state."""
+    obs = collections.OrderedDict()
+    obs['position'] = physics.position()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a sparse reward."""
+    return physics.in_target()
--- a/DPI/local_dm_control_suite/ball_in_cup.xml
+++ b/DPI/local_dm_control_suite/ball_in_cup.xml
@ -0,0 +1,54 @@
+<mujoco model="ball in cup">
+
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <default>
+    <motor ctrllimited="true" ctrlrange="-1 1" gear="5"/>
+    <default class="cup">
+      <joint type="slide" damping="3" stiffness="20"/>
+      <geom type="capsule" size=".008" material="self"/>
+    </default>
+  </default>
+
+  <worldbody>
+    <light name="light" directional="true" diffuse=".6 .6 .6" pos="0 0 2" specular=".3 .3 .3"/>
+    <geom name="ground" type="plane" pos="0 0 0" size=".6 .2 10" material="grid"/>
+    <camera name="cam0" pos="0 -1 .8" xyaxes="1 0 0 0 1 2"/>
+    <camera name="cam1" pos="0 -1 .4" xyaxes="1 0 0 0 0 1" />
+
+    <body name="cup" pos="0 0 .6" childclass="cup">
+      <joint name="cup_x" axis="1 0 0"/>
+      <joint name="cup_z" axis="0 0 1"/>
+      <geom name="cup_part_0" fromto="-.05 0 0 -.05 0 -.075" />
+      <geom name="cup_part_1" fromto="-.05 0 -.075 -.025 0 -.1" />
+      <geom name="cup_part_2" fromto="-.025 0 -.1 .025 0 -.1" />
+      <geom name="cup_part_3" fromto=".025 0 -.1 .05 0 -.075" />
+      <geom name="cup_part_4" fromto=".05 0 -.075 .05 0 0" />
+      <site name="cup" pos="0 0 -.108" size=".005"/>
+      <site name="target" type="box" pos="0 0 -.05" size=".05 .006 .05" group="4"/>
+    </body>
+
+    <body name="ball" pos="0 0 .2">
+      <joint name="ball_x" type="slide" axis="1 0 0"/>
+      <joint name="ball_z" type="slide" axis="0 0 1"/>
+      <geom name="ball" type="sphere" size=".025" material="effector"/>
+      <site name="ball" size=".005"/>
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor name="x" joint="cup_x"/>
+    <motor name="z" joint="cup_z"/>
+  </actuator>
+
+  <tendon>
+    <spatial name="string" limited="true" range="0 0.3" width="0.003">
+      <site site="ball"/>
+      <site site="cup"/>
+    </spatial>
+  </tendon>
+
+</mujoco>
+
--- a/DPI/local_dm_control_suite/base.py
+++ b/DPI/local_dm_control_suite/base.py
@ -0,0 +1,112 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Base class for tasks in the Control Suite."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from dm_control import mujoco
+from dm_control.rl import control
+
+import numpy as np
+
+
+class Task(control.Task):
+  """Base class for tasks in the Control Suite.
+
+  Actions are mapped directly to the states of MuJoCo actuators: each element of
+  the action array is used to set the control input for a single actuator. The
+  ordering of the actuators is the same as in the corresponding MJCF XML file.
+
+  Attributes:
+    random: A `numpy.random.RandomState` instance. This should be used to
+      generate all random variables associated with the task, such as random
+      starting states, observation noise* etc.
+
+  *If sensor noise is enabled in the MuJoCo model then this will be generated
+  using MuJoCo's internal RNG, which has its own independent state.
+  """
+
+  def __init__(self, random=None):
+    """Initializes a new continuous control task.
+
+    Args:
+      random: Optional, either a `numpy.random.RandomState` instance, an integer
+        seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    if not isinstance(random, np.random.RandomState):
+      random = np.random.RandomState(random)
+    self._random = random
+    self._visualize_reward = False
+
+  @property
+  def random(self):
+    """Task-specific `numpy.random.RandomState` instance."""
+    return self._random
+
+  def action_spec(self, physics):
+    """Returns a `BoundedArraySpec` matching the `physics` actuators."""
+    return mujoco.action_spec(physics)
+
+  def initialize_episode(self, physics):
+    """Resets geom colors to their defaults after starting a new episode.
+
+    Subclasses of `base.Task` must delegate to this method after performing
+    their own initialization.
+
+    Args:
+      physics: An instance of `mujoco.Physics`.
+    """
+    self.after_step(physics)
+
+  def before_step(self, action, physics):
+    """Sets the control signal for the actuators to values in `action`."""
+    # Support legacy internal code.
+    action = getattr(action, "continuous_actions", action)
+    physics.set_control(action)
+
+  def after_step(self, physics):
+    """Modifies colors according to the reward."""
+    if self._visualize_reward:
+      reward = np.clip(self.get_reward(physics), 0.0, 1.0)
+      _set_reward_colors(physics, reward)
+
+  @property
+  def visualize_reward(self):
+    return self._visualize_reward
+
+  @visualize_reward.setter
+  def visualize_reward(self, value):
+    if not isinstance(value, bool):
+      raise ValueError("Expected a boolean, got {}.".format(type(value)))
+    self._visualize_reward = value
+
+
+_MATERIALS = ["self", "effector", "target"]
+_DEFAULT = [name + "_default" for name in _MATERIALS]
+_HIGHLIGHT = [name + "_highlight" for name in _MATERIALS]
+
+
+def _set_reward_colors(physics, reward):
+  """Sets the highlight, effector and target colors according to the reward."""
+  assert 0.0 <= reward <= 1.0
+  colors = physics.named.model.mat_rgba
+  default = colors[_DEFAULT]
+  highlight = colors[_HIGHLIGHT]
+  blend_coef = reward ** 4  # Better color distinction near high rewards.
+  colors[_MATERIALS] = blend_coef * highlight + (1.0 - blend_coef) * default
--- a/DPI/local_dm_control_suite/cartpole.py
+++ b/DPI/local_dm_control_suite/cartpole.py
@ -0,0 +1,230 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Cartpole domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.utils import containers
+from dm_control.utils import rewards
+from lxml import etree
+import numpy as np
+from six.moves import range
+
+
+_DEFAULT_TIME_LIMIT = 10
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets(num_poles=1):
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return _make_model(num_poles), common.ASSETS
+
+
+@SUITE.add('benchmarking')
+def balance(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns the Cartpole Balance task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Balance(swing_up=False, sparse=False, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def balance_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+                   environment_kwargs=None):
+  """Returns the sparse reward variant of the Cartpole Balance task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Balance(swing_up=False, sparse=True, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns the Cartpole Swing-Up task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Balance(swing_up=True, sparse=False, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def swingup_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+                   environment_kwargs=None):
+  """Returns the sparse reward variant of teh Cartpole Swing-Up task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Balance(swing_up=True, sparse=True, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+@SUITE.add()
+def two_poles(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+              environment_kwargs=None):
+  """Returns the Cartpole Balance task with two poles."""
+  physics = Physics.from_xml_string(*get_model_and_assets(num_poles=2))
+  task = Balance(swing_up=True, sparse=False, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+@SUITE.add()
+def three_poles(time_limit=_DEFAULT_TIME_LIMIT, random=None, num_poles=3,
+                sparse=False, environment_kwargs=None):
+  """Returns the Cartpole Balance task with three or more poles."""
+  physics = Physics.from_xml_string(*get_model_and_assets(num_poles=num_poles))
+  task = Balance(swing_up=True, sparse=sparse, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+def _make_model(n_poles):
+  """Generates an xml string defining a cart with `n_poles` bodies."""
+  xml_string = common.read_model('cartpole.xml')
+  if n_poles == 1:
+    return xml_string
+  mjcf = etree.fromstring(xml_string)
+  parent = mjcf.find('./worldbody/body/body')  # Find first pole.
+  # Make chain of poles.
+  for pole_index in range(2, n_poles+1):
+    child = etree.Element('body', name='pole_{}'.format(pole_index),
+                          pos='0 0 1', childclass='pole')
+    etree.SubElement(child, 'joint', name='hinge_{}'.format(pole_index))
+    etree.SubElement(child, 'geom', name='pole_{}'.format(pole_index))
+    parent.append(child)
+    parent = child
+  # Move plane down.
+  floor = mjcf.find('./worldbody/geom')
+  floor.set('pos', '0 0 {}'.format(1 - n_poles - .05))
+  # Move cameras back.
+  cameras = mjcf.findall('./worldbody/camera')
+  cameras[0].set('pos', '0 {} 1'.format(-1 - 2*n_poles))
+  cameras[1].set('pos', '0 {} 2'.format(-2*n_poles))
+  return etree.tostring(mjcf, pretty_print=True)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Cartpole domain."""
+
+  def cart_position(self):
+    """Returns the position of the cart."""
+    return self.named.data.qpos['slider'][0]
+
+  def angular_vel(self):
+    """Returns the angular velocity of the pole."""
+    return self.data.qvel[1:]
+
+  def pole_angle_cosine(self):
+    """Returns the cosine of the pole angle."""
+    return self.named.data.xmat[2:, 'zz']
+
+  def bounded_position(self):
+    """Returns the state, with pole angle split into sin/cos."""
+    return np.hstack((self.cart_position(),
+                      self.named.data.xmat[2:, ['zz', 'xz']].ravel()))
+
+
+class Balance(base.Task):
+  """A Cartpole `Task` to balance the pole.
+
+  State is initialized either close to the target configuration or at a random
+  configuration.
+  """
+  _CART_RANGE = (-.25, .25)
+  _ANGLE_COSINE_RANGE = (.995, 1)
+
+  def __init__(self, swing_up, sparse, random=None):
+    """Initializes an instance of `Balance`.
+
+    Args:
+      swing_up: A `bool`, which if `True` sets the cart to the middle of the
+        slider and the pole pointing towards the ground. Otherwise, sets the
+        cart to a random position on the slider and the pole to a random
+        near-vertical position.
+      sparse: A `bool`, whether to return a sparse or a smooth reward.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._sparse = sparse
+    self._swing_up = swing_up
+    super(Balance, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+
+    Initializes the cart and pole according to `swing_up`, and in both cases
+    adds a small random initial velocity to break symmetry.
+
+    Args:
+      physics: An instance of `Physics`.
+    """
+    nv = physics.model.nv
+    if self._swing_up:
+      physics.named.data.qpos['slider'] = .01*self.random.randn()
+      physics.named.data.qpos['hinge_1'] = np.pi + .01*self.random.randn()
+      physics.named.data.qpos[2:] = .1*self.random.randn(nv - 2)
+    else:
+      physics.named.data.qpos['slider'] = self.random.uniform(-.1, .1)
+      physics.named.data.qpos[1:] = self.random.uniform(-.034, .034, nv - 1)
+    physics.named.data.qvel[:] = 0.01 * self.random.randn(physics.model.nv)
+    super(Balance, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of the (bounded) physics state."""
+    obs = collections.OrderedDict()
+    obs['position'] = physics.bounded_position()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def _get_reward(self, physics, sparse):
+    if sparse:
+      cart_in_bounds = rewards.tolerance(physics.cart_position(),
+                                         self._CART_RANGE)
+      angle_in_bounds = rewards.tolerance(physics.pole_angle_cosine(),
+                                          self._ANGLE_COSINE_RANGE).prod()
+      return cart_in_bounds * angle_in_bounds
+    else:
+      upright = (physics.pole_angle_cosine() + 1) / 2
+      centered = rewards.tolerance(physics.cart_position(), margin=2)
+      centered = (1 + centered) / 2
+      small_control = rewards.tolerance(physics.control(), margin=1,
+                                        value_at_margin=0,
+                                        sigmoid='quadratic')[0]
+      small_control = (4 + small_control) / 5
+      small_velocity = rewards.tolerance(physics.angular_vel(), margin=5).min()
+      small_velocity = (1 + small_velocity) / 2
+      return upright.mean() * small_control * small_velocity * centered
+
+  def get_reward(self, physics):
+    """Returns a sparse or a smooth reward, as specified in the constructor."""
+    return self._get_reward(physics, sparse=self._sparse)
--- a/DPI/local_dm_control_suite/cartpole.xml
+++ b/DPI/local_dm_control_suite/cartpole.xml
@ -0,0 +1,37 @@
+<mujoco model="cart-pole">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep="0.01" integrator="RK4">
+    <flag contact="disable" energy="enable"/>
+  </option>
+
+  <default>
+    <default class="pole">
+      <joint type="hinge" axis="0 1 0"  damping="2e-6"/>
+      <geom type="capsule" fromto="0 0 0 0 0 1" size="0.045" material="self" mass=".1"/>
+    </default>
+  </default>
+
+  <worldbody>
+    <light name="light" pos="0 0 6"/>
+    <camera name="fixed" pos="0 -4 1" zaxis="0 -1 0"/>
+    <camera name="lookatcart" mode="targetbody" target="cart" pos="0 -2 2"/>
+    <geom name="floor" pos="0 0 -.05" size="4 4 .2" type="plane" material="grid"/>
+    <geom name="rail1" type="capsule" pos="0  .07 1" zaxis="1 0 0" size="0.02 2" material="decoration" />
+    <geom name="rail2" type="capsule" pos="0 -.07 1" zaxis="1 0 0" size="0.02 2" material="decoration" />
+    <body name="cart" pos="0 0 1">
+      <joint name="slider" type="slide" limited="true" axis="1 0 0" range="-1.8 1.8" solreflimit=".08 1" damping="5e-4"/>
+      <geom name="cart" type="box" size="0.2 0.15 0.1" material="self"  mass="1"/>
+      <body name="pole_1" childclass="pole">
+        <joint name="hinge_1"/>
+        <geom name="pole_1"/>
+      </body>
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor name="slide" joint="slider" gear="10" ctrllimited="true" ctrlrange="-1 1" />
+  </actuator>
+</mujoco>
--- a/DPI/local_dm_control_suite/cheetah.py
+++ b/DPI/local_dm_control_suite/cheetah.py
@ -0,0 +1,97 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Cheetah Domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.utils import containers
+from dm_control.utils import rewards
+
+
+# How long the simulation will run, in seconds.
+_DEFAULT_TIME_LIMIT = 10
+
+# Running speed above which reward is 1.
+_RUN_SPEED = 10
+
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('cheetah.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking')
+def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the run task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Cheetah(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(physics, task, time_limit=time_limit,
+                             **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Cheetah domain."""
+
+  def speed(self):
+    """Returns the horizontal speed of the Cheetah."""
+    return self.named.data.sensordata['torso_subtreelinvel'][0]
+
+
+class Cheetah(base.Task):
+  """A `Task` to train a running Cheetah."""
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode."""
+    # The indexing below assumes that all joints have a single DOF.
+    assert physics.model.nq == physics.model.njnt
+    is_limited = physics.model.jnt_limited == 1
+    lower, upper = physics.model.jnt_range[is_limited].T
+    physics.data.qpos[is_limited] = self.random.uniform(lower, upper)
+
+    # Stabilize the model before the actual simulation.
+    for _ in range(200):
+      physics.step()
+
+    physics.data.time = 0
+    self._timeout_progress = 0
+    super(Cheetah, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of the state, ignoring horizontal position."""
+    obs = collections.OrderedDict()
+    # Ignores horizontal position to maintain translational invariance.
+    obs['position'] = physics.data.qpos[1:].copy()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+    return rewards.tolerance(physics.speed(),
+                             bounds=(_RUN_SPEED, float('inf')),
+                             margin=_RUN_SPEED,
+                             value_at_margin=0,
+                             sigmoid='linear')
--- a/DPI/local_dm_control_suite/cheetah.xml
+++ b/DPI/local_dm_control_suite/cheetah.xml
@ -0,0 +1,73 @@
+<mujoco model="cheetah">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials_white_floor.xml"/>
+
+  <compiler settotalmass="14"/>
+
+  <default>
+    <default class="cheetah">
+      <joint limited="true" damping=".01" armature=".1" stiffness="8" type="hinge" axis="0 1 0"/>
+      <geom contype="1" conaffinity="1" condim="3" friction=".4 .1 .1" material="self"/>
+    </default>
+    <default class="free">
+      <joint limited="false" damping="0" armature="0" stiffness="0"/>
+    </default>
+    <motor ctrllimited="true" ctrlrange="-1 1"/>
+  </default>
+
+  <statistic center="0 0 .7" extent="2"/>
+
+  <option timestep="0.01"/>
+
+  <worldbody>
+    <geom name="ground" type="plane" conaffinity="1" pos="98 0 0" size="100 .8 .5" material="grid"/>
+    <body name="torso" pos="0 0 .7" childclass="cheetah">
+      <light name="light" pos="0 0 2" mode="trackcom"/>
+      <camera name="side" pos="0 -3 0" quat="0.707 0.707 0 0" mode="trackcom"/>
+      <camera name="back" pos="-1.8 -1.3 0.8" xyaxes="0.45 -0.9 0 0.3 0.15 0.94" mode="trackcom"/>
+      <joint name="rootx" type="slide" axis="1 0 0" class="free"/>
+      <joint name="rootz" type="slide" axis="0 0 1" class="free"/>
+      <joint name="rooty" type="hinge" axis="0 1 0" class="free"/>
+      <geom name="torso" type="capsule" fromto="-.5 0 0 .5 0 0" size="0.046"/>
+      <geom name="head" type="capsule" pos=".6 0 .1" euler="0 50 0" size="0.046 .15"/>
+      <body name="bthigh" pos="-.5 0 0">
+        <joint name="bthigh" range="-30 60" stiffness="240" damping="6"/>
+        <geom name="bthigh" type="capsule" pos=".1 0 -.13" euler="0 -218 0" size="0.046 .145"/>
+        <body name="bshin" pos=".16 0 -.25">
+          <joint name="bshin" range="-50 50" stiffness="180" damping="4.5"/>
+          <geom name="bshin" type="capsule" pos="-.14 0 -.07" euler="0 -116 0" size="0.046 .15"/>
+          <body name="bfoot" pos="-.28 0 -.14">
+            <joint name="bfoot" range="-230 50" stiffness="120" damping="3"/>
+            <geom name="bfoot" type="capsule" pos=".03 0 -.097" euler="0 -15 0" size="0.046 .094"/>
+          </body>
+        </body>
+      </body>
+      <body name="fthigh" pos=".5 0 0">
+        <joint name="fthigh" range="-57 .40" stiffness="180" damping="4.5"/>
+        <geom name="fthigh" type="capsule" pos="-.07 0 -.12" euler="0 30 0" size="0.046 .133"/>
+        <body name="fshin" pos="-.14 0 -.24">
+          <joint name="fshin" range="-70 50" stiffness="120" damping="3"/>
+          <geom name="fshin" type="capsule" pos=".065 0 -.09" euler="0 -34 0" size="0.046 .106"/>
+          <body name="ffoot" pos=".13 0 -.18">
+            <joint name="ffoot" range="-28 28" stiffness="60" damping="1.5"/>
+            <geom name="ffoot" type="capsule" pos=".045 0 -.07" euler="0 -34 0" size="0.046 .07"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+
+  <sensor>
+    <subtreelinvel name="torso_subtreelinvel" body="torso"/>
+  </sensor>
+
+  <actuator>
+    <motor name="bthigh" joint="bthigh" gear="120" />
+    <motor name="bshin" joint="bshin" gear="90" />
+    <motor name="bfoot" joint="bfoot" gear="60" />
+    <motor name="fthigh" joint="fthigh" gear="90" />
+    <motor name="fshin" joint="fshin" gear="60" />
+    <motor name="ffoot" joint="ffoot" gear="30" />
+  </actuator>
+</mujoco>
--- a/DPI/local_dm_control_suite/common/init.py
+++ b/DPI/local_dm_control_suite/common/init.py
@ -0,0 +1,39 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Functions to manage the common assets for domains."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+from dm_control.utils import io as resources
+
+_SUITE_DIR = os.path.dirname(os.path.dirname(__file__))
+_FILENAMES = [
+    "./common/materials.xml",
+    "./common/materials_white_floor.xml",
+    "./common/skybox.xml",
+    "./common/visual.xml",
+]
+
+ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename))
+          for filename in _FILENAMES}
+
+
+def read_model(model_filename):
+  """Reads a model XML file and returns its contents as a string."""
+  return resources.GetResource(os.path.join(_SUITE_DIR, model_filename))
--- a/DPI/local_dm_control_suite/common/materials.xml
+++ b/DPI/local_dm_control_suite/common/materials.xml
@ -0,0 +1,23 @@
+<!--
+Common textures, colors and materials to be used throughout this suite. Some
+materials such as xxx_highlight are activated on occurence of certain events,
+for example receiving a positive reward.
+-->
+<mujoco>
+  <asset>
+    <texture name="grid" type="2d" builtin="checker" rgb1=".1 .1 .4" rgb2=".2 .2 .8" width="300" height="300" mark="edge" markrgb=".1 .1 .4"/>
+    <material name="grid" texture="grid" texrepeat="1 1" texuniform="true" reflectance=".2"/>
+    <material name="self" rgba=".7 .5 .3 1"/>
+    <material name="self_default" rgba=".7 .5 .3 1"/>
+    <material name="self_highlight" rgba="0 .5 .3 1"/>
+    <material name="effector" rgba=".7 .4 .2 1"/>
+    <material name="effector_default" rgba=".7 .4 .2 1"/>
+    <material name="effector_highlight" rgba="0 .5 .3 1"/>
+    <material name="decoration" rgba=".7 .5 .3 1"/>
+    <material name="eye" rgba="0 .2 1 1"/>
+    <material name="target" rgba=".6 .3 .3 1"/>
+    <material name="target_default" rgba=".6 .3 .3 1"/>
+    <material name="target_highlight" rgba=".6 .3 .3 .4"/>
+    <material name="site" rgba=".5 .5 .5 .3"/>
+  </asset>
+</mujoco>
--- a/DPI/local_dm_control_suite/common/materials_white_floor.xml
+++ b/DPI/local_dm_control_suite/common/materials_white_floor.xml
@ -0,0 +1,23 @@
+<!--
+Common textures, colors and materials to be used throughout this suite. Some
+materials such as xxx_highlight are activated on occurence of certain events,
+for example receiving a positive reward.
+-->
+<mujoco>
+  <asset>
+    <texture name="grid" type="2d" builtin="checker" rgb1=".1 .1 .1" rgb2=".2 .2 .2" width="300" height="300" mark="edge" markrgb=".1 .1 .1"/>
+    <material name="grid" texture="grid" texrepeat="1 1" texuniform="true" reflectance=".2"/>
+    <material name="self" rgba=".7 .5 .3 1"/>
+    <material name="self_default" rgba=".7 .5 .3 1"/>
+    <material name="self_highlight" rgba="0 .5 .3 1"/>
+    <material name="effector" rgba=".7 .4 .2 1"/>
+    <material name="effector_default" rgba=".7 .4 .2 1"/>
+    <material name="effector_highlight" rgba="0 .5 .3 1"/>
+    <material name="decoration" rgba=".3 .5 .7 1"/>
+    <material name="eye" rgba="0 .2 1 1"/>
+    <material name="target" rgba=".6 .3 .3 1"/>
+    <material name="target_default" rgba=".6 .3 .3 1"/>
+    <material name="target_highlight" rgba=".6 .3 .3 .4"/>
+    <material name="site" rgba=".5 .5 .5 .3"/>
+  </asset>
+</mujoco>
--- a/DPI/local_dm_control_suite/common/skybox.xml
+++ b/DPI/local_dm_control_suite/common/skybox.xml
@ -0,0 +1,6 @@
+<mujoco>
+  <asset>
+      <texture name="skybox" type="skybox" builtin="gradient" rgb1=".4 .6 .8" rgb2="0 0 0"
+               width="800" height="800" mark="random" markrgb="0 0 0"/>
+  </asset>
+</mujoco>
--- a/DPI/local_dm_control_suite/common/visual.xml
+++ b/DPI/local_dm_control_suite/common/visual.xml
@ -0,0 +1,7 @@
+<mujoco>
+  <visual>
+    <headlight ambient=".4 .4 .4" diffuse=".8 .8 .8" specular="0.1 0.1 0.1"/>
+    <map znear=".01"/>
+    <quality shadowsize="2048"/>
+  </visual>
+</mujoco>
--- a/DPI/local_dm_control_suite/demos/mocap_demo.py
+++ b/DPI/local_dm_control_suite/demos/mocap_demo.py
@ -0,0 +1,84 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Demonstration of amc parsing for CMU mocap database.
+
+To run the demo, supply a path to a `.amc` file:
+
+    python mocap_demo --filename='path/to/mocap.amc'
+
+CMU motion capture clips are available at mocap.cs.cmu.edu
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+# Internal dependencies.
+
+from absl import app
+from absl import flags
+
+from local_dm_control_suite import humanoid_CMU
+from dm_control.suite.utils import parse_amc
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string('filename', None, 'amc file to be converted.')
+flags.DEFINE_integer('max_num_frames', 90,
+                     'Maximum number of frames for plotting/playback')
+
+
+def main(unused_argv):
+  env = humanoid_CMU.stand()
+
+  # Parse and convert specified clip.
+  converted = parse_amc.convert(FLAGS.filename,
+                                env.physics, env.control_timestep())
+
+  max_frame = min(FLAGS.max_num_frames, converted.qpos.shape[1] - 1)
+
+  width = 480
+  height = 480
+  video = np.zeros((max_frame, height, 2 * width, 3), dtype=np.uint8)
+
+  for i in range(max_frame):
+    p_i = converted.qpos[:, i]
+    with env.physics.reset_context():
+      env.physics.data.qpos[:] = p_i
+    video[i] = np.hstack([env.physics.render(height, width, camera_id=0),
+                          env.physics.render(height, width, camera_id=1)])
+
+  tic = time.time()
+  for i in range(max_frame):
+    if i == 0:
+      img = plt.imshow(video[i])
+    else:
+      img.set_data(video[i])
+    toc = time.time()
+    clock_dt = toc - tic
+    tic = time.time()
+    # Real-time playback not always possible as clock_dt > .03
+    plt.pause(max(0.01, 0.03 - clock_dt))  # Need min display time > 0.0.
+    plt.draw()
+  plt.waitforbuttonpress()
+
+
+if __name__ == '__main__':
+  flags.mark_flag_as_required('filename')
+  app.run(main)
--- a/DPI/local_dm_control_suite/demos/zeros.amc
+++ b/DPI/local_dm_control_suite/demos/zeros.amc
@ -0,0 +1,213 @@
+#DUMMY AMC for testing
+:FULLY-SPECIFIED
+:DEGREES
+1
+root 0 0 0 0 0 0
+lowerback 0 0 0
+upperback 0 0 0
+thorax 0 0 0
+lowerneck 0 0 0
+upperneck 0 0 0
+head 0 0 0
+rclavicle 0 0
+rhumerus 0 0 0
+rradius 0
+rwrist 0
+rhand 0 0
+rfingers 0
+rthumb 0 0
+lclavicle 0 0
+lhumerus 0 0 0
+lradius 0
+lwrist 0
+lhand 0 0
+lfingers 0
+lthumb 0 0
+rfemur 0 0 0
+rtibia 0
+rfoot 0 0
+rtoes 0
+lfemur 0 0 0
+ltibia 0
+lfoot 0 0
+ltoes 0
+2
+root 0 0 0 0 0 0
+lowerback 0 0 0
+upperback 0 0 0
+thorax 0 0 0
+lowerneck 0 0 0
+upperneck 0 0 0
+head 0 0 0
+rclavicle 0 0
+rhumerus 0 0 0
+rradius 0
+rwrist 0
+rhand 0 0
+rfingers 0
+rthumb 0 0
+lclavicle 0 0
+lhumerus 0 0 0
+lradius 0
+lwrist 0
+lhand 0 0
+lfingers 0
+lthumb 0 0
+rfemur 0 0 0
+rtibia 0
+rfoot 0 0
+rtoes 0
+lfemur 0 0 0
+ltibia 0
+lfoot 0 0
+ltoes 0
+3
+root 0 0 0 0 0 0
+lowerback 0 0 0
+upperback 0 0 0
+thorax 0 0 0
+lowerneck 0 0 0
+upperneck 0 0 0
+head 0 0 0
+rclavicle 0 0
+rhumerus 0 0 0
+rradius 0
+rwrist 0
+rhand 0 0
+rfingers 0
+rthumb 0 0
+lclavicle 0 0
+lhumerus 0 0 0
+lradius 0
+lwrist 0
+lhand 0 0
+lfingers 0
+lthumb 0 0
+rfemur 0 0 0
+rtibia 0
+rfoot 0 0
+rtoes 0
+lfemur 0 0 0
+ltibia 0
+lfoot 0 0
+ltoes 0
+4
+root 0 0 0 0 0 0
+lowerback 0 0 0
+upperback 0 0 0
+thorax 0 0 0
+lowerneck 0 0 0
+upperneck 0 0 0
+head 0 0 0
+rclavicle 0 0
+rhumerus 0 0 0
+rradius 0
+rwrist 0
+rhand 0 0
+rfingers 0
+rthumb 0 0
+lclavicle 0 0
+lhumerus 0 0 0
+lradius 0
+lwrist 0
+lhand 0 0
+lfingers 0
+lthumb 0 0
+rfemur 0 0 0
+rtibia 0
+rfoot 0 0
+rtoes 0
+lfemur 0 0 0
+ltibia 0
+lfoot 0 0
+ltoes 0
+5
+root 0 0 0 0 0 0
+lowerback 0 0 0
+upperback 0 0 0
+thorax 0 0 0
+lowerneck 0 0 0
+upperneck 0 0 0
+head 0 0 0
+rclavicle 0 0
+rhumerus 0 0 0
+rradius 0
+rwrist 0
+rhand 0 0
+rfingers 0
+rthumb 0 0
+lclavicle 0 0
+lhumerus 0 0 0
+lradius 0
+lwrist 0
+lhand 0 0
+lfingers 0
+lthumb 0 0
+rfemur 0 0 0
+rtibia 0
+rfoot 0 0
+rtoes 0
+lfemur 0 0 0
+ltibia 0
+lfoot 0 0
+ltoes 0
+6
+root 0 0 0 0 0 0
+lowerback 0 0 0
+upperback 0 0 0
+thorax 0 0 0
+lowerneck 0 0 0
+upperneck 0 0 0
+head 0 0 0
+rclavicle 0 0
+rhumerus 0 0 0
+rradius 0
+rwrist 0
+rhand 0 0
+rfingers 0
+rthumb 0 0
+lclavicle 0 0
+lhumerus 0 0 0
+lradius 0
+lwrist 0
+lhand 0 0
+lfingers 0
+lthumb 0 0
+rfemur 0 0 0
+rtibia 0
+rfoot 0 0
+rtoes 0
+lfemur 0 0 0
+ltibia 0
+lfoot 0 0
+ltoes 0
+7
+root 0 0 0 0 0 0
+lowerback 0 0 0
+upperback 0 0 0
+thorax 0 0 0
+lowerneck 0 0 0
+upperneck 0 0 0
+head 0 0 0
+rclavicle 0 0
+rhumerus 0 0 0
+rradius 0
+rwrist 0
+rhand 0 0
+rfingers 0
+rthumb 0 0
+lclavicle 0 0
+lhumerus 0 0 0
+lradius 0
+lwrist 0
+lhand 0 0
+lfingers 0
+lthumb 0 0
+rfemur 0 0 0
+rtibia 0
+rfoot 0 0
+rtoes 0
+lfemur 0 0 0
+ltibia 0
+lfoot 0 0
+ltoes 0
--- a/DPI/local_dm_control_suite/explore.py
+++ b/DPI/local_dm_control_suite/explore.py
@ -0,0 +1,84 @@
+# Copyright 2018 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Control suite environments explorer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import app
+from absl import flags
+from dm_control import suite
+from dm_control.suite.wrappers import action_noise
+from six.moves import input
+
+from dm_control import viewer
+
+
+_ALL_NAMES = ['.'.join(domain_task) for domain_task in suite.ALL_TASKS]
+
+flags.DEFINE_enum('environment_name', None, _ALL_NAMES,
+                  'Optional \'domain_name.task_name\' pair specifying the '
+                  'environment to load. If unspecified a prompt will appear to '
+                  'select one.')
+flags.DEFINE_bool('timeout', True, 'Whether episodes should have a time limit.')
+flags.DEFINE_bool('visualize_reward', True,
+                  'Whether to vary the colors of geoms according to the '
+                  'current reward value.')
+flags.DEFINE_float('action_noise', 0.,
+                   'Standard deviation of Gaussian noise to apply to actions, '
+                   'expressed as a fraction of the max-min range for each '
+                   'action dimension. Defaults to 0, i.e. no noise.')
+FLAGS = flags.FLAGS
+
+
+def prompt_environment_name(prompt, values):
+  environment_name = None
+  while not environment_name:
+    environment_name = input(prompt)
+    if not environment_name or values.index(environment_name) < 0:
+      print('"%s" is not a valid environment name.' % environment_name)
+      environment_name = None
+  return environment_name
+
+
+def main(argv):
+  del argv
+  environment_name = FLAGS.environment_name
+  if environment_name is None:
+    print('\n  '.join(['Available environments:'] + _ALL_NAMES))
+    environment_name = prompt_environment_name(
+        'Please select an environment name: ', _ALL_NAMES)
+
+  index = _ALL_NAMES.index(environment_name)
+  domain_name, task_name = suite.ALL_TASKS[index]
+
+  task_kwargs = {}
+  if not FLAGS.timeout:
+    task_kwargs['time_limit'] = float('inf')
+
+  def loader():
+    env = suite.load(
+        domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs)
+    env.task.visualize_reward = FLAGS.visualize_reward
+    if FLAGS.action_noise > 0:
+      env = action_noise.Wrapper(env, scale=FLAGS.action_noise)
+    return env
+
+  viewer.launch(loader)
+
+
+if __name__ == '__main__':
+  app.run(main)
--- a/DPI/local_dm_control_suite/finger.py
+++ b/DPI/local_dm_control_suite/finger.py
@ -0,0 +1,217 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Finger Domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.suite.utils import randomizers
+from dm_control.utils import containers
+import numpy as np
+from six.moves import range
+
+_DEFAULT_TIME_LIMIT = 20  # (seconds)
+_CONTROL_TIMESTEP = .02   # (seconds)
+# For TURN tasks, the 'tip' geom needs to enter a spherical target of sizes:
+_EASY_TARGET_SIZE = 0.07
+_HARD_TARGET_SIZE = 0.03
+# Initial spin velocity for the Stop task.
+_INITIAL_SPIN_VELOCITY = 100
+# Spinning slower than this value (radian/second) is considered stopped.
+_STOP_VELOCITY = 1e-6
+# Spinning faster than this value (radian/second) is considered spinning.
+_SPIN_VELOCITY = 15.0
+
+
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('finger.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking')
+def spin(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Spin task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Spin(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def turn_easy(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+              environment_kwargs=None):
+  """Returns the easy Turn task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Turn(target_radius=_EASY_TARGET_SIZE, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def turn_hard(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+              environment_kwargs=None):
+  """Returns the hard Turn task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Turn(target_radius=_HARD_TARGET_SIZE, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Finger domain."""
+
+  def touch(self):
+    """Returns logarithmically scaled signals from the two touch sensors."""
+    return np.log1p(self.named.data.sensordata[['touchtop', 'touchbottom']])
+
+  def hinge_velocity(self):
+    """Returns the velocity of the hinge joint."""
+    return self.named.data.sensordata['hinge_velocity']
+
+  def tip_position(self):
+    """Returns the (x,z) position of the tip relative to the hinge."""
+    return (self.named.data.sensordata['tip'][[0, 2]] -
+            self.named.data.sensordata['spinner'][[0, 2]])
+
+  def bounded_position(self):
+    """Returns the positions, with the hinge angle replaced by tip position."""
+    return np.hstack((self.named.data.sensordata[['proximal', 'distal']],
+                      self.tip_position()))
+
+  def velocity(self):
+    """Returns the velocities (extracted from sensordata)."""
+    return self.named.data.sensordata[['proximal_velocity',
+                                       'distal_velocity',
+                                       'hinge_velocity']]
+
+  def target_position(self):
+    """Returns the (x,z) position of the target relative to the hinge."""
+    return (self.named.data.sensordata['target'][[0, 2]] -
+            self.named.data.sensordata['spinner'][[0, 2]])
+
+  def to_target(self):
+    """Returns the vector from the tip to the target."""
+    return self.target_position() - self.tip_position()
+
+  def dist_to_target(self):
+    """Returns the signed distance to the target surface, negative is inside."""
+    return (np.linalg.norm(self.to_target()) -
+            self.named.model.site_size['target', 0])
+
+
+class Spin(base.Task):
+  """A Finger `Task` to spin the stopped body."""
+
+  def __init__(self, random=None):
+    """Initializes a new `Spin` instance.
+
+    Args:
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    super(Spin, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    physics.named.model.site_rgba['target', 3] = 0
+    physics.named.model.site_rgba['tip', 3] = 0
+    physics.named.model.dof_damping['hinge'] = .03
+    _set_random_joint_angles(physics, self.random)
+    super(Spin, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns state and touch sensors, and target info."""
+    obs = collections.OrderedDict()
+    obs['position'] = physics.bounded_position()
+    obs['velocity'] = physics.velocity()
+    obs['touch'] = physics.touch()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a sparse reward."""
+    return float(physics.hinge_velocity() <= -_SPIN_VELOCITY)
+
+
+class Turn(base.Task):
+  """A Finger `Task` to turn the body to a target angle."""
+
+  def __init__(self, target_radius, random=None):
+    """Initializes a new `Turn` instance.
+
+    Args:
+      target_radius: Radius of the target site, which specifies the goal angle.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._target_radius = target_radius
+    super(Turn, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    target_angle = self.random.uniform(-np.pi, np.pi)
+    hinge_x, hinge_z = physics.named.data.xanchor['hinge', ['x', 'z']]
+    radius = physics.named.model.geom_size['cap1'].sum()
+    target_x = hinge_x + radius * np.sin(target_angle)
+    target_z = hinge_z + radius * np.cos(target_angle)
+    physics.named.model.site_pos['target', ['x', 'z']] = target_x, target_z
+    physics.named.model.site_size['target', 0] = self._target_radius
+
+    _set_random_joint_angles(physics, self.random)
+
+    super(Turn, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns state, touch sensors, and target info."""
+    obs = collections.OrderedDict()
+    obs['position'] = physics.bounded_position()
+    obs['velocity'] = physics.velocity()
+    obs['touch'] = physics.touch()
+    obs['target_position'] = physics.target_position()
+    obs['dist_to_target'] = physics.dist_to_target()
+    return obs
+
+  def get_reward(self, physics):
+    return float(physics.dist_to_target() <= 0)
+
+
+def _set_random_joint_angles(physics, random, max_attempts=1000):
+  """Sets the joints to a random collision-free state."""
+
+  for _ in range(max_attempts):
+    randomizers.randomize_limited_and_rotational_joints(physics, random)
+    # Check for collisions.
+    physics.after_reset()
+    if physics.data.ncon == 0:
+      break
+  else:
+    raise RuntimeError('Could not find a collision-free state '
+                       'after {} attempts'.format(max_attempts))
--- a/DPI/local_dm_control_suite/finger.xml
+++ b/DPI/local_dm_control_suite/finger.xml
@ -0,0 +1,72 @@
+ <mujoco model="finger">
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep="0.01" cone="elliptic" iterations="200">
+    <flag gravity="disable"/>
+  </option>
+
+  <default>
+    <geom solimp="0 0.9 0.01" solref=".02 1"/>
+    <joint type="hinge" axis="0 -1 0"/>
+    <motor ctrllimited="true" ctrlrange="-1 1"/>
+    <default class="finger">
+      <joint damping="2.5" limited="true"/>
+      <site type="ellipsoid" size=".025 .03 .025" material="site" group="3"/>
+    </default>
+  </default>
+
+  <worldbody>
+    <light name="light" directional="true" diffuse=".6 .6 .6" pos="0 0 2" specular=".3 .3 .3"/>
+    <geom name="ground" type="plane" pos="0 0 0" size=".6 .2 10" material="grid"/>
+    <camera name="cam0" pos="0 -1 .8" xyaxes="1 0 0 0 1 2"/>
+    <camera name="cam1" pos="0 -1 .4" xyaxes="1 0 0 0 0 1" />
+
+    <body name="proximal" pos="-.2 0 .4" childclass="finger">
+      <geom name="proximal_decoration" type="cylinder" fromto="0 -.033 0 0 .033 0" size=".034" material="decoration"/>
+      <joint name="proximal" range="-110 110" ref="-90"/>
+      <geom name="proximal" type="capsule" material="self" size=".03" fromto="0 0 0 0 0 -.17"/>
+      <body name="distal" pos="0 0 -.18" childclass="finger">
+        <joint name="distal" range="-110 110"/>
+        <geom name="distal" type="capsule" size=".028" material="self" fromto="0 0 0 0 0 -.16" contype="0" conaffinity="0"/>
+        <geom name="fingertip" type="capsule" size=".03" material="effector" fromto="0 0 -.13 0 0 -.161"/>
+        <site name="touchtop" pos=".01 0 -.17"/>
+        <site name="touchbottom" pos="-.01 0 -.17"/>
+      </body>
+    </body>
+
+    <body name="spinner" pos=".2 0 .4">
+      <joint name="hinge" frictionloss=".1" damping=".5"/>
+      <geom name="cap1" type="capsule" size=".04 .09" material="self" pos=".02 0 0"/>
+      <geom name="cap2" type="capsule" size=".04 .09" material="self" pos="-.02 0 0"/>
+      <site name="tip" type="sphere"  size=".02" pos="0 0 .13" material="target"/>
+      <geom name="spinner_decoration" type="cylinder" fromto="0 -.045 0 0 .045 0" size=".02" material="decoration"/>
+    </body>
+
+    <site name="target" type="sphere" size=".03" pos="0 0 .4" material="target"/>
+  </worldbody>
+
+  <actuator>
+    <motor name="proximal" joint="proximal" gear="30"/>
+    <motor name="distal" joint="distal" gear="15"/>
+  </actuator>
+
+  <!-- All finger observations are functions of sensors. This is useful for finite-differencing. -->
+  <sensor>
+    <jointpos name="proximal" joint="proximal"/>
+    <jointpos name="distal" joint="distal"/>
+    <jointvel name="proximal_velocity" joint="proximal"/>
+    <jointvel name="distal_velocity" joint="distal"/>
+    <jointvel name="hinge_velocity" joint="hinge"/>
+    <framepos name="tip" objtype="site" objname="tip"/>
+    <framepos name="target" objtype="site" objname="target"/>
+    <framepos name="spinner" objtype="xbody" objname="spinner"/>
+    <touch name="touchtop" site="touchtop"/>
+    <touch name="touchbottom" site="touchbottom"/>
+    <framepos name="touchtop_pos" objtype="site" objname="touchtop"/>
+    <framepos name="touchbottom_pos" objtype="site" objname="touchbottom"/>
+  </sensor>
+
+</mujoco>
+
--- a/DPI/local_dm_control_suite/fish.py
+++ b/DPI/local_dm_control_suite/fish.py
@ -0,0 +1,176 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Fish Domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.utils import containers
+from dm_control.utils import rewards
+import numpy as np
+
+
+_DEFAULT_TIME_LIMIT = 40
+_CONTROL_TIMESTEP = .04
+_JOINTS = ['tail1',
+           'tail_twist',
+           'tail2',
+           'finright_roll',
+           'finright_pitch',
+           'finleft_roll',
+           'finleft_pitch']
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('fish.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking')
+def upright(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns the Fish Upright task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Upright(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+      **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def swim(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Fish Swim task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Swim(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+      **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Fish domain."""
+
+  def upright(self):
+    """Returns projection from z-axes of torso to the z-axes of worldbody."""
+    return self.named.data.xmat['torso', 'zz']
+
+  def torso_velocity(self):
+    """Returns velocities and angular velocities of the torso."""
+    return self.data.sensordata
+
+  def joint_velocities(self):
+    """Returns the joint velocities."""
+    return self.named.data.qvel[_JOINTS]
+
+  def joint_angles(self):
+    """Returns the joint positions."""
+    return self.named.data.qpos[_JOINTS]
+
+  def mouth_to_target(self):
+    """Returns a vector, from mouth to target in local coordinate of mouth."""
+    data = self.named.data
+    mouth_to_target_global = data.geom_xpos['target'] - data.geom_xpos['mouth']
+    return mouth_to_target_global.dot(data.geom_xmat['mouth'].reshape(3, 3))
+
+
+class Upright(base.Task):
+  """A Fish `Task` for getting the torso upright with smooth reward."""
+
+  def __init__(self, random=None):
+    """Initializes an instance of `Upright`.
+
+    Args:
+      random: Either an existing `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically.
+    """
+    super(Upright, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Randomizes the tail and fin angles and the orientation of the Fish."""
+    quat = self.random.randn(4)
+    physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
+    for joint in _JOINTS:
+      physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
+    # Hide the target. It's irrelevant for this task.
+    physics.named.model.geom_rgba['target', 3] = 0
+    super(Upright, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of joint angles, velocities and uprightness."""
+    obs = collections.OrderedDict()
+    obs['joint_angles'] = physics.joint_angles()
+    obs['upright'] = physics.upright()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a smooth reward."""
+    return rewards.tolerance(physics.upright(), bounds=(1, 1), margin=1)
+
+
+class Swim(base.Task):
+  """A Fish `Task` for swimming with smooth reward."""
+
+  def __init__(self, random=None):
+    """Initializes an instance of `Swim`.
+
+    Args:
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    super(Swim, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode."""
+
+    quat = self.random.randn(4)
+    physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
+    for joint in _JOINTS:
+      physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
+    # Randomize target position.
+    physics.named.model.geom_pos['target', 'x'] = self.random.uniform(-.4, .4)
+    physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-.4, .4)
+    physics.named.model.geom_pos['target', 'z'] = self.random.uniform(.1, .3)
+    super(Swim, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of joints, target direction and velocities."""
+    obs = collections.OrderedDict()
+    obs['joint_angles'] = physics.joint_angles()
+    obs['upright'] = physics.upright()
+    obs['target'] = physics.mouth_to_target()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a smooth reward."""
+    radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum()
+    in_target = rewards.tolerance(np.linalg.norm(physics.mouth_to_target()),
+                                  bounds=(0, radii), margin=2*radii)
+    is_upright = 0.5 * (physics.upright() + 1)
+    return (7*in_target + is_upright) / 8
--- a/DPI/local_dm_control_suite/fish.xml
+++ b/DPI/local_dm_control_suite/fish.xml
@ -0,0 +1,85 @@
+<mujoco model="fish">
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+  <asset>
+      <texture name="skybox" type="skybox" builtin="gradient" rgb1=".4 .6 .8" rgb2="0 0 0" width="800" height="800" mark="random" markrgb="1 1 1"/>
+  </asset>
+
+
+  <option timestep="0.004" density="5000">
+    <flag gravity="disable" constraint="disable"/>
+  </option>
+
+  <default>
+    <general ctrllimited="true"/>
+    <default class="fish">
+      <joint type="hinge" limited="false" range="-60 60" damping="2e-5" solreflimit=".1 1" solimplimit="0 .8 .1"/>
+      <geom material="self"/>
+    </default>
+  </default>
+
+  <worldbody>
+    <camera name="tracking_top" pos="0 0 1" xyaxes="1 0 0 0 1 0" mode="trackcom"/>
+    <camera name="tracking_x" pos="-.3 0 .2" xyaxes="0 -1 0 0.342 0 0.940" fovy="60" mode="trackcom"/>
+    <camera name="tracking_y" pos="0 -.3 .2" xyaxes="1 0 0 0 0.342 0.940" fovy="60" mode="trackcom"/>
+    <camera name="fixed_top" pos="0 0 5.5" fovy="10"/>
+    <geom name="ground" type="plane" size=".5 .5 .1" material="grid"/>
+    <geom name="target" type="sphere" pos="0 .4 .1" size=".04" material="target"/>
+    <body name="torso" pos="0 0 .1" childclass="fish">
+      <light name="light" diffuse=".6 .6 .6" pos="0 0 0.5" dir="0 0 -1" specular=".3 .3 .3" mode="track"/>
+      <joint name="root" type="free" damping="0" limited="false"/>
+      <site name="torso" size=".01" rgba="0 0 0 0"/>
+      <geom name="eye" type="ellipsoid" pos="0 .055 .015" size=".008 .012 .008" euler="-10 0 0" material="eye" mass="0"/>
+      <camera name="eye" pos="0 .06 .02" xyaxes="1 0 0 0 0 1"/>
+      <geom name="mouth" type="capsule" fromto="0 .079 0 0 .07 0" size=".005" material="effector" mass="0"/>
+      <geom name="lower_mouth" type="capsule" fromto="0 .079 -.004 0 .07 -.003" size=".0045" material="effector" mass="0"/>
+      <geom name="torso" type="ellipsoid" size=".01 .08 .04" mass="0"/>
+      <geom name="back_fin" type="ellipsoid" size=".001 .03 .015" pos="0 -.03 .03" material="effector" mass="0"/>
+      <geom name="torso_massive" type="box" size=".002 .06 .03" group="4"/>
+      <body name="tail1" pos="0 -.09 0">
+        <joint name="tail1" axis="0 0 1" pos="0 .01 0"/>
+        <joint name="tail_twist" axis="0 1 0" pos="0 .01 0" range="-30 30"/>
+        <geom name="tail1" type="ellipsoid" size=".001 .008 .016"/>
+        <body name="tail2" pos="0 -.028 0">
+          <joint name="tail2" axis="0 0 1" pos="0 .02 0" stiffness="8e-5"/>
+          <geom name="tail2" type="ellipsoid" size=".001 .018 .035"/>
+        </body>
+      </body>
+      <body name="finright" pos=".01 0 0">
+        <joint name="finright_roll" axis="0 1 0"/>
+        <joint name="finright_pitch" axis="1 0 0" pos="0 .005 0"/>
+        <geom name="finright" type="ellipsoid" pos=".015 0 0" size=".02 .015 .001"  />
+      </body>
+      <body name="finleft" pos="-.01 0 0">
+        <joint name="finleft_roll" axis="0 1 0"/>
+        <joint name="finleft_pitch" axis="1 0 0" pos="0 .005 0"/>
+        <geom name="finleft" type="ellipsoid"  pos="-.015 0 0" size=".02 .015 .001"/>
+      </body>
+    </body>
+  </worldbody>
+
+  <tendon>
+    <fixed name="fins_flap">
+      <joint joint="finleft_roll"  coef="-.5"/>
+      <joint joint="finright_roll" coef=".5"/>
+    </fixed>
+    <fixed name="fins_sym" stiffness="1e-4">
+      <joint joint="finleft_roll"  coef=".5"/>
+      <joint joint="finright_roll" coef=".5"/>
+    </fixed>
+  </tendon>
+
+  <actuator>
+    <position name="tail"           joint="tail1"           ctrlrange="-1 1"    kp="5e-4"/>
+    <position name="tail_twist"     joint="tail_twist"      ctrlrange="-1 1"    kp="1e-4"/>
+    <position name="fins_flap"      tendon="fins_flap"      ctrlrange="-1 1"    kp="3e-4"/>
+    <position name="finleft_pitch"  joint="finleft_pitch"   ctrlrange="-1 1"    kp="1e-4"/>
+    <position name="finright_pitch" joint="finright_pitch"  ctrlrange="-1 1"    kp="1e-4"/>
+  </actuator>
+
+  <sensor>
+    <velocimeter name="velocimeter" site="torso"/>
+    <gyro name="gyro" site="torso"/>
+  </sensor>
+</mujoco>
+
--- a/DPI/local_dm_control_suite/hopper.py
+++ b/DPI/local_dm_control_suite/hopper.py
@ -0,0 +1,138 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Hopper domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.suite.utils import randomizers
+from dm_control.utils import containers
+from dm_control.utils import rewards
+import numpy as np
+
+
+SUITE = containers.TaggedTasks()
+
+_CONTROL_TIMESTEP = .02  # (Seconds)
+
+# Default duration of an episode, in seconds.
+_DEFAULT_TIME_LIMIT = 20
+
+# Minimal height of torso over foot above which stand reward is 1.
+_STAND_HEIGHT = 0.6
+
+# Hopping speed above which hop reward is 1.
+_HOP_SPEED = 2
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('hopper.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking')
+def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns a Hopper that strives to stand upright, balancing its pose."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Hopper(hopping=False, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def hop(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns a Hopper that strives to hop forward."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Hopper(hopping=True, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Hopper domain."""
+
+  def height(self):
+    """Returns height of torso with respect to foot."""
+    return (self.named.data.xipos['torso', 'z'] -
+            self.named.data.xipos['foot', 'z'])
+
+  def speed(self):
+    """Returns horizontal speed of the Hopper."""
+    return self.named.data.sensordata['torso_subtreelinvel'][0]
+
+  def touch(self):
+    """Returns the signals from two foot touch sensors."""
+    return np.log1p(self.named.data.sensordata[['touch_toe', 'touch_heel']])
+
+
+class Hopper(base.Task):
+  """A Hopper's `Task` to train a standing and a jumping Hopper."""
+
+  def __init__(self, hopping, random=None):
+    """Initialize an instance of `Hopper`.
+
+    Args:
+      hopping: Boolean, if True the task is to hop forwards, otherwise it is to
+        balance upright.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._hopping = hopping
+    super(Hopper, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode."""
+    randomizers.randomize_limited_and_rotational_joints(physics, self.random)
+    self._timeout_progress = 0
+    super(Hopper, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of positions, velocities and touch sensors."""
+    obs = collections.OrderedDict()
+    # Ignores horizontal position to maintain translational invariance:
+    obs['position'] = physics.data.qpos[1:].copy()
+    obs['velocity'] = physics.velocity()
+    obs['touch'] = physics.touch()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward applicable to the performed task."""
+    standing = rewards.tolerance(physics.height(), (_STAND_HEIGHT, 2))
+    if self._hopping:
+      hopping = rewards.tolerance(physics.speed(),
+                                  bounds=(_HOP_SPEED, float('inf')),
+                                  margin=_HOP_SPEED/2,
+                                  value_at_margin=0.5,
+                                  sigmoid='linear')
+      return standing * hopping
+    else:
+      small_control = rewards.tolerance(physics.control(),
+                                        margin=1, value_at_margin=0,
+                                        sigmoid='quadratic').mean()
+      small_control = (small_control + 4) / 5
+      return standing * small_control
--- a/DPI/local_dm_control_suite/hopper.xml
+++ b/DPI/local_dm_control_suite/hopper.xml
@ -0,0 +1,66 @@
+<mujoco model="planar hopper">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials_white_floor.xml"/>
+
+  <statistic extent="2" center="0 0 .5"/>
+
+  <default>
+    <default class="hopper">
+      <joint type="hinge" axis="0 1 0" limited="true" damping=".05" armature=".2"/>
+      <geom type="capsule" material="self"/>
+      <site type="sphere" size="0.05" group="3"/>
+    </default>
+    <default class="free">
+      <joint limited="false" damping="0" armature="0" stiffness="0"/>
+    </default>
+    <motor ctrlrange="-1 1" ctrllimited="true"/>
+  </default>
+
+  <option timestep="0.005"/>
+
+  <worldbody>
+    <camera name="cam0" pos="0 -2.8 0.8" euler="90 0 0" mode="trackcom"/>
+    <camera name="back" pos="-2 -.2 1.2" xyaxes="0.2 -1 0 .5 0 2" mode="trackcom"/>
+    <geom name="floor" type="plane" conaffinity="1" pos="48 0 0" size="50 1 .2" material="grid"/>
+    <body name="torso" pos="0 0 1" childclass="hopper">
+      <light name="top" pos="0 0 2" mode="trackcom"/>
+      <joint name="rootx" type="slide" axis="1 0 0" class="free"/>
+      <joint name="rootz" type="slide" axis="0 0 1" class="free"/>
+      <joint name="rooty" type="hinge" axis="0 1 0" class="free"/>
+      <geom name="torso" fromto="0 0 -.05 0 0 .2" size="0.0653"/>
+      <geom name="nose" fromto=".08 0 .13 .15 0 .14" size="0.03"/>
+      <body name="pelvis" pos="0 0 -.05">
+        <joint name="waist" range="-30 30"/>
+        <geom name="pelvis" fromto="0 0 0 0 0 -.15" size="0.065"/>
+        <body name="thigh" pos="0 0 -.2">
+          <joint name="hip" range="-170 10"/>
+          <geom name="thigh" fromto="0 0 0 0 0 -.33" size="0.04"/>
+          <body name="calf" pos="0 0 -.33">
+            <joint name="knee" range="5 150"/>
+            <geom name="calf" fromto="0 0 0 0 0 -.32" size="0.03"/>
+            <body name="foot" pos="0 0 -.32">
+              <joint name="ankle" range="-45 45"/>
+              <geom name="foot" fromto="-.08 0 0 .17 0 0" size="0.04"/>
+              <site name="touch_toe" pos=".17 0 0"/>
+              <site name="touch_heel" pos="-.08 0 0"/>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+
+  <sensor>
+    <subtreelinvel name="torso_subtreelinvel" body="torso"/>
+    <touch name="touch_toe" site="touch_toe"/>
+    <touch name="touch_heel" site="touch_heel"/>
+  </sensor>
+
+  <actuator>
+    <motor name="waist" joint="waist" gear="30"/>
+    <motor name="hip" joint="hip" gear="40"/>
+    <motor name="knee" joint="knee" gear="30"/>
+    <motor name="ankle" joint="ankle" gear="10"/>
+  </actuator>
+</mujoco>
--- a/DPI/local_dm_control_suite/humanoid.py
+++ b/DPI/local_dm_control_suite/humanoid.py
@ -0,0 +1,211 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Humanoid Domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.suite.utils import randomizers
+from dm_control.utils import containers
+from dm_control.utils import rewards
+import numpy as np
+
+_DEFAULT_TIME_LIMIT = 25
+_CONTROL_TIMESTEP = .025
+
+# Height of head above which stand reward is 1.
+_STAND_HEIGHT = 1.4
+
+# Horizontal speeds above which move reward is 1.
+_WALK_SPEED = 1
+_RUN_SPEED = 10
+
+
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('humanoid.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking')
+def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Stand task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Humanoid(move_speed=0, pure_state=False, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Walk task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Humanoid(move_speed=_WALK_SPEED, pure_state=False, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Run task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Humanoid(move_speed=_RUN_SPEED, pure_state=False, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@SUITE.add()
+def run_pure_state(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+                   environment_kwargs=None):
+  """Returns the Run task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Humanoid(move_speed=_RUN_SPEED, pure_state=True, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Walker domain."""
+
+  def torso_upright(self):
+    """Returns projection from z-axes of torso to the z-axes of world."""
+    return self.named.data.xmat['torso', 'zz']
+
+  def head_height(self):
+    """Returns the height of the torso."""
+    return self.named.data.xpos['head', 'z']
+
+  def center_of_mass_position(self):
+    """Returns position of the center-of-mass."""
+    return self.named.data.subtree_com['torso'].copy()
+
+  def center_of_mass_velocity(self):
+    """Returns the velocity of the center-of-mass."""
+    return self.named.data.sensordata['torso_subtreelinvel'].copy()
+
+  def torso_vertical_orientation(self):
+    """Returns the z-projection of the torso orientation matrix."""
+    return self.named.data.xmat['torso', ['zx', 'zy', 'zz']]
+
+  def joint_angles(self):
+    """Returns the state without global orientation or position."""
+    return self.data.qpos[7:].copy()  # Skip the 7 DoFs of the free root joint.
+
+  def extremities(self):
+    """Returns end effector positions in egocentric frame."""
+    torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
+    torso_pos = self.named.data.xpos['torso']
+    positions = []
+    for side in ('left_', 'right_'):
+      for limb in ('hand', 'foot'):
+        torso_to_limb = self.named.data.xpos[side + limb] - torso_pos
+        positions.append(torso_to_limb.dot(torso_frame))
+    return np.hstack(positions)
+
+
+class Humanoid(base.Task):
+  """A humanoid task."""
+
+  def __init__(self, move_speed, pure_state, random=None):
+    """Initializes an instance of `Humanoid`.
+
+    Args:
+      move_speed: A float. If this value is zero, reward is given simply for
+        standing up. Otherwise this specifies a target horizontal velocity for
+        the walking task.
+      pure_state: A bool. Whether the observations consist of the pure MuJoCo
+        state or includes some useful features thereof.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._move_speed = move_speed
+    self._pure_state = pure_state
+    super(Humanoid, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+
+    Args:
+      physics: An instance of `Physics`.
+
+    """
+    # Find a collision-free random initial configuration.
+    penetrating = True
+    while penetrating:
+      randomizers.randomize_limited_and_rotational_joints(physics, self.random)
+      # Check for collisions.
+      physics.after_reset()
+      penetrating = physics.data.ncon > 0
+    super(Humanoid, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns either the pure state or a set of egocentric features."""
+    obs = collections.OrderedDict()
+    if self._pure_state:
+      obs['position'] = physics.position()
+      obs['velocity'] = physics.velocity()
+    else:
+      obs['joint_angles'] = physics.joint_angles()
+      obs['head_height'] = physics.head_height()
+      obs['extremities'] = physics.extremities()
+      obs['torso_vertical'] = physics.torso_vertical_orientation()
+      obs['com_velocity'] = physics.center_of_mass_velocity()
+      obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+    standing = rewards.tolerance(physics.head_height(),
+                                 bounds=(_STAND_HEIGHT, float('inf')),
+                                 margin=_STAND_HEIGHT/4)
+    upright = rewards.tolerance(physics.torso_upright(),
+                                bounds=(0.9, float('inf')), sigmoid='linear',
+                                margin=1.9, value_at_margin=0)
+    stand_reward = standing * upright
+    small_control = rewards.tolerance(physics.control(), margin=1,
+                                      value_at_margin=0,
+                                      sigmoid='quadratic').mean()
+    small_control = (4 + small_control) / 5
+    if self._move_speed == 0:
+      horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]]
+      dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean()
+      return small_control * stand_reward * dont_move
+    else:
+      com_velocity = np.linalg.norm(physics.center_of_mass_velocity()[[0, 1]])
+      move = rewards.tolerance(com_velocity,
+                               bounds=(self._move_speed, float('inf')),
+                               margin=self._move_speed, value_at_margin=0,
+                               sigmoid='linear')
+      move = (5*move + 1) / 6
+      return small_control * stand_reward * move
--- a/DPI/local_dm_control_suite/humanoid.xml
+++ b/DPI/local_dm_control_suite/humanoid.xml
@ -0,0 +1,202 @@
+<mujoco model="humanoid">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <statistic extent="2" center="0 0 1"/>
+
+  <option timestep=".005"/>
+
+  <default>
+    <motor ctrlrange="-1 1" ctrllimited="true"/>
+    <default class="body">
+      <geom  type="capsule" condim="1" friction=".7" solimp=".9 .99 .003" solref=".015 1" material="self"/>
+      <joint type="hinge" damping=".2" stiffness="1" armature=".01" limited="true" solimplimit="0 .99 .01"/>
+      <default class="big_joint">
+        <joint damping="5" stiffness="10"/>
+        <default class="big_stiff_joint">
+          <joint stiffness="20"/>
+        </default>
+      </default>
+      <site size=".04" group="3"/>
+      <default class="force-torque">
+        <site type="box" size=".01 .01 .02" rgba="1 0 0 1" />
+      </default>
+      <default class="touch">
+        <site type="capsule" rgba="0 0 1 .3"/>
+      </default>
+    </default>
+  </default>
+
+  <worldbody>
+    <geom name="floor" type="plane" conaffinity="1" size="100 100 .2" material="grid"/>
+    <body name="torso" pos="0 0 1.5" childclass="body">
+      <light name="top" pos="0 0 2" mode="trackcom"/>
+      <camera name="back" pos="-3 0 1" xyaxes="0 -1 0 1 0 2" mode="trackcom"/>
+      <camera name="side" pos="0 -3 1" xyaxes="1 0 0 0 1 2" mode="trackcom"/>
+      <freejoint name="root"/>
+      <site name="root" class="force-torque"/>
+      <geom name="torso" fromto="0 -.07 0 0 .07 0" size=".07"/>
+      <geom name="upper_waist" fromto="-.01 -.06 -.12 -.01 .06 -.12" size=".06"/>
+      <site name="torso" class="touch" type="box" pos="0 0 -.05" size=".075 .14 .13"/>
+      <body name="head" pos="0 0 .19">
+        <geom name="head" type="sphere" size=".09"/>
+        <site name="head" class="touch" type="sphere" size=".091"/>
+        <camera name="egocentric" pos=".09 0 0" xyaxes="0 -1 0 .1 0 1" fovy="80"/>
+      </body>
+      <body name="lower_waist" pos="-.01 0 -.260" quat="1.000 0 -.002 0">
+        <geom name="lower_waist" fromto="0 -.06 0 0 .06 0" size=".06"/>
+        <site name="lower_waist" class="touch" size=".061 .06" zaxis="0 1 0"/>
+        <joint name="abdomen_z" pos="0 0 .065" axis="0 0 1" range="-45 45" class="big_stiff_joint"/>
+        <joint name="abdomen_y" pos="0 0 .065" axis="0 1 0" range="-75 30" class="big_joint"/>
+        <body name="pelvis" pos="0 0 -.165" quat="1.000 0 -.002 0">
+          <joint name="abdomen_x" pos="0 0 .1" axis="1 0 0" range="-35 35" class="big_joint"/>
+          <geom name="butt" fromto="-.02 -.07 0 -.02 .07 0" size=".09"/>
+          <site name="butt" class="touch" size=".091 .07" pos="-.02 0 0" zaxis="0 1 0"/>
+          <body name="right_thigh" pos="0 -.1 -.04">
+            <site name="right_hip" class="force-torque"/>
+            <joint name="right_hip_x" axis="1 0 0" range="-25 5"   class="big_joint"/>
+            <joint name="right_hip_z" axis="0 0 1" range="-60 35"  class="big_joint"/>
+            <joint name="right_hip_y" axis="0 1 0" range="-110 20" class="big_stiff_joint"/>
+            <geom name="right_thigh" fromto="0 0 0 0 .01 -.34" size=".06"/>
+            <site name="right_thigh" class="touch" pos="0 .005 -.17" size=".061 .17" zaxis="0 -1 34"/>
+            <body name="right_shin" pos="0 .01 -.403">
+              <site name="right_knee" class="force-torque" pos="0 0 .02"/>
+              <joint name="right_knee" pos="0 0 .02" axis="0 -1 0" range="-160 2"/>
+              <geom name="right_shin" fromto="0 0 0 0 0 -.3"  size=".049"/>
+              <site name="right_shin" class="touch" pos="0 0 -.15" size=".05 .15"/>
+              <body name="right_foot" pos="0 0 -.39">
+                <site name="right_ankle" class="force-torque"/>
+                <joint name="right_ankle_y" pos="0 0 .08" axis="0 1 0"   range="-50 50" stiffness="6"/>
+                <joint name="right_ankle_x" pos="0 0 .04" axis="1 0 .5" range="-50 50" stiffness="3"/>
+                <geom name="right_right_foot" fromto="-.07 -.02 0 .14 -.04 0" size=".027"/>
+                <geom name="left_right_foot" fromto="-.07 0 0 .14  .02 0" size=".027"/>
+                <site name="right_right_foot" class="touch" pos=".035 -.03 0" size=".03 .11" zaxis="21 -2 0"/>
+                <site name="left_right_foot" class="touch" pos=".035 .01 0" size=".03 .11" zaxis="21 2 0"/>
+              </body>
+            </body>
+          </body>
+          <body name="left_thigh" pos="0 .1 -.04">
+            <site name="left_hip" class="force-torque"/>
+            <joint name="left_hip_x" axis="-1 0 0" range="-25 5"  class="big_joint"/>
+            <joint name="left_hip_z" axis="0 0 -1" range="-60 35" class="big_joint"/>
+            <joint name="left_hip_y" axis="0 1 0" range="-120 20" class="big_stiff_joint"/>
+            <geom name="left_thigh" fromto="0 0 0 0 -.01 -.34" size=".06"/>
+            <site name="left_thigh" class="touch" pos="0 -.005 -.17" size=".061 .17" zaxis="0 1 34"/>
+            <body name="left_shin" pos="0 -.01 -.403">
+              <site name="left_knee" class="force-torque" pos="0 0 .02"/>
+              <joint name="left_knee" pos="0 0 .02" axis="0 -1 0" range="-160 2"/>
+              <geom name="left_shin" fromto="0 0 0 0 0 -.3"  size=".049"/>
+              <site name="left_shin" class="touch"  pos="0 0 -.15" size=".05 .15"/>
+              <body name="left_foot" pos="0 0 -.39">
+                <site name="left_ankle" class="force-torque"/>
+                <joint name="left_ankle_y" pos="0 0 .08" axis="0 1 0"   range="-50 50" stiffness="6"/>
+                <joint name="left_ankle_x" pos="0 0 .04" axis="1 0 .5" range="-50 50" stiffness="3"/>
+                <geom name="left_left_foot" fromto="-.07 .02 0 .14 .04 0" size=".027"/>
+                <geom name="right_left_foot" fromto="-.07 0 0 .14  -.02 0" size=".027"/>
+                <site name="right_left_foot" class="touch" pos=".035 -.01 0" size=".03 .11" zaxis="21 -2 0"/>
+                <site name="left_left_foot" class="touch" pos=".035 .03 0" size=".03 .11" zaxis="21 2 0"/>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+      <body name="right_upper_arm" pos="0 -.17 .06">
+        <joint name="right_shoulder1" axis="2 1 1"  range="-85 60"/>
+        <joint name="right_shoulder2" axis="0 -1 1" range="-85 60"/>
+        <geom name="right_upper_arm" fromto="0 0 0 .16 -.16 -.16" size=".04 .16"/>
+        <site name="right_upper_arm" class="touch" pos=".08 -.08 -.08" size=".041 .14" zaxis="1 -1 -1"/>
+        <body name="right_lower_arm" pos=".18 -.18 -.18">
+          <joint name="right_elbow" axis="0 -1 1" range="-90 50" stiffness="0"/>
+          <geom name="right_lower_arm" fromto=".01 .01 .01 .17 .17 .17" size=".031"/>
+          <site name="right_lower_arm" class="touch" pos=".09 .09 .09" size=".032 .14" zaxis="1 1 1"/>
+          <body name="right_hand" pos=".18 .18 .18">
+            <geom name="right_hand" type="sphere" size=".04"/>
+            <site name="right_hand" class="touch" type="sphere" size=".041"/>
+          </body>
+        </body>
+      </body>
+      <body name="left_upper_arm" pos="0 .17 .06">
+        <joint name="left_shoulder1" axis="2 -1 1" range="-60 85"/>
+        <joint name="left_shoulder2" axis="0 1 1" range="-60 85"/>
+        <geom name="left_upper_arm" fromto="0 0 0 .16 .16 -.16" size=".04 .16"/>
+        <site name="left_upper_arm" class="touch" pos=".08 .08 -.08" size=".041 .14" zaxis="1 1 -1"/>
+        <body name="left_lower_arm" pos=".18 .18 -.18">
+          <joint name="left_elbow" axis="0 -1 -1" range="-90 50" stiffness="0"/>
+          <geom name="left_lower_arm" fromto=".01 -.01 .01 .17 -.17 .17" size=".031"/>
+          <site name="left_lower_arm" class="touch" pos=".09 -.09 .09" size=".032 .14" zaxis="1 -1 1"/>
+          <body name="left_hand" pos=".18 -.18 .18">
+            <geom name="left_hand" type="sphere" size=".04"/>
+            <site name="left_hand" class="touch" type="sphere" size=".041"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor name="abdomen_y"       gear="40"  joint="abdomen_y"/>
+    <motor name="abdomen_z"       gear="40"  joint="abdomen_z"/>
+    <motor name="abdomen_x"       gear="40"  joint="abdomen_x"/>
+    <motor name="right_hip_x"     gear="40"  joint="right_hip_x"/>
+    <motor name="right_hip_z"     gear="40"  joint="right_hip_z"/>
+    <motor name="right_hip_y"     gear="120" joint="right_hip_y"/>
+    <motor name="right_knee"      gear="80"  joint="right_knee"/>
+    <motor name="right_ankle_x"   gear="20"  joint="right_ankle_x"/>
+    <motor name="right_ankle_y"   gear="20"  joint="right_ankle_y"/>
+    <motor name="left_hip_x"      gear="40"  joint="left_hip_x"/>
+    <motor name="left_hip_z"      gear="40"  joint="left_hip_z"/>
+    <motor name="left_hip_y"      gear="120" joint="left_hip_y"/>
+    <motor name="left_knee"       gear="80"  joint="left_knee"/>
+    <motor name="left_ankle_x"    gear="20"  joint="left_ankle_x"/>
+    <motor name="left_ankle_y"    gear="20"  joint="left_ankle_y"/>
+    <motor name="right_shoulder1" gear="20"  joint="right_shoulder1"/>
+    <motor name="right_shoulder2" gear="20"  joint="right_shoulder2"/>
+    <motor name="right_elbow"     gear="40"  joint="right_elbow"/>
+    <motor name="left_shoulder1"  gear="20"  joint="left_shoulder1"/>
+    <motor name="left_shoulder2"  gear="20"  joint="left_shoulder2"/>
+    <motor name="left_elbow"      gear="40"  joint="left_elbow"/>
+  </actuator>
+
+  <sensor>
+    <subtreelinvel name="torso_subtreelinvel" body="torso"/>
+    <accelerometer name="torso_accel"    site="root"/>
+    <velocimeter name="torso_vel"        site="root"/>
+    <gyro name="torso_gyro"              site="root"/>
+
+    <force name="left_ankle_force"       site="left_ankle"/>
+    <force name="right_ankle_force"      site="right_ankle"/>
+    <force name="left_knee_force"        site="left_knee"/>
+    <force name="right_knee_force"       site="right_knee"/>
+    <force name="left_hip_force"         site="left_hip"/>
+    <force name="right_hip_force"        site="right_hip"/>
+
+    <torque name="left_ankle_torque"     site="left_ankle"/>
+    <torque name="right_ankle_torque"    site="right_ankle"/>
+    <torque name="left_knee_torque"      site="left_knee"/>
+    <torque name="right_knee_torque"     site="right_knee"/>
+    <torque name="left_hip_torque"       site="left_hip"/>
+    <torque name="right_hip_torque"      site="right_hip"/>
+
+    <touch name="torso_touch"            site="torso"/>
+    <touch name="head_touch"             site="head"/>
+    <touch name="lower_waist_touch"      site="lower_waist"/>
+    <touch name="butt_touch"             site="butt"/>
+    <touch name="right_thigh_touch"      site="right_thigh"/>
+    <touch name="right_shin_touch"       site="right_shin"/>
+    <touch name="right_right_foot_touch" site="right_right_foot"/>
+    <touch name="left_right_foot_touch"  site="left_right_foot"/>
+    <touch name="left_thigh_touch"       site="left_thigh"/>
+    <touch name="left_shin_touch"        site="left_shin"/>
+    <touch name="right_left_foot_touch"  site="right_left_foot"/>
+    <touch name="left_left_foot_touch"   site="left_left_foot"/>
+    <touch name="right_upper_arm_touch"  site="right_upper_arm"/>
+    <touch name="right_lower_arm_touch"  site="right_lower_arm"/>
+    <touch name="right_hand_touch"       site="right_hand"/>
+    <touch name="left_upper_arm_touch"   site="left_upper_arm"/>
+    <touch name="left_lower_arm_touch"   site="left_lower_arm"/>
+    <touch name="left_hand_touch"        site="left_hand"/>
+  </sensor>
+
+</mujoco>
+
--- a/DPI/local_dm_control_suite/humanoid_CMU.py
+++ b/DPI/local_dm_control_suite/humanoid_CMU.py
@ -0,0 +1,179 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Humanoid_CMU Domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.suite.utils import randomizers
+from dm_control.utils import containers
+from dm_control.utils import rewards
+import numpy as np
+
+_DEFAULT_TIME_LIMIT = 20
+_CONTROL_TIMESTEP = 0.02
+
+# Height of head above which stand reward is 1.
+_STAND_HEIGHT = 1.4
+
+# Horizontal speeds above which move reward is 1.
+_WALK_SPEED = 1
+_RUN_SPEED = 10
+
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('humanoid_CMU.xml'), common.ASSETS
+
+
+@SUITE.add()
+def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Stand task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = HumanoidCMU(move_speed=0, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@SUITE.add()
+def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Run task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = HumanoidCMU(move_speed=_RUN_SPEED, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the humanoid_CMU domain."""
+
+  def thorax_upright(self):
+    """Returns projection from y-axes of thorax to the z-axes of world."""
+    return self.named.data.xmat['thorax', 'zy']
+
+  def head_height(self):
+    """Returns the height of the head."""
+    return self.named.data.xpos['head', 'z']
+
+  def center_of_mass_position(self):
+    """Returns position of the center-of-mass."""
+    return self.named.data.subtree_com['thorax']
+
+  def center_of_mass_velocity(self):
+    """Returns the velocity of the center-of-mass."""
+    return self.named.data.sensordata['thorax_subtreelinvel'].copy()
+
+  def torso_vertical_orientation(self):
+    """Returns the z-projection of the thorax orientation matrix."""
+    return self.named.data.xmat['thorax', ['zx', 'zy', 'zz']]
+
+  def joint_angles(self):
+    """Returns the state without global orientation or position."""
+    return self.data.qpos[7:].copy()  # Skip the 7 DoFs of the free root joint.
+
+  def extremities(self):
+    """Returns end effector positions in egocentric frame."""
+    torso_frame = self.named.data.xmat['thorax'].reshape(3, 3)
+    torso_pos = self.named.data.xpos['thorax']
+    positions = []
+    for side in ('l', 'r'):
+      for limb in ('hand', 'foot'):
+        torso_to_limb = self.named.data.xpos[side + limb] - torso_pos
+        positions.append(torso_to_limb.dot(torso_frame))
+    return np.hstack(positions)
+
+
+class HumanoidCMU(base.Task):
+  """A task for the CMU Humanoid."""
+
+  def __init__(self, move_speed, random=None):
+    """Initializes an instance of `Humanoid_CMU`.
+
+    Args:
+      move_speed: A float. If this value is zero, reward is given simply for
+        standing up. Otherwise this specifies a target horizontal velocity for
+        the walking task.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._move_speed = move_speed
+    super(HumanoidCMU, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets a random collision-free configuration at the start of each episode.
+
+    Args:
+      physics: An instance of `Physics`.
+    """
+    penetrating = True
+    while penetrating:
+      randomizers.randomize_limited_and_rotational_joints(
+          physics, self.random)
+      # Check for collisions.
+      physics.after_reset()
+      penetrating = physics.data.ncon > 0
+    super(HumanoidCMU, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns a set of egocentric features."""
+    obs = collections.OrderedDict()
+    obs['joint_angles'] = physics.joint_angles()
+    obs['head_height'] = physics.head_height()
+    obs['extremities'] = physics.extremities()
+    obs['torso_vertical'] = physics.torso_vertical_orientation()
+    obs['com_velocity'] = physics.center_of_mass_velocity()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+    standing = rewards.tolerance(physics.head_height(),
+                                 bounds=(_STAND_HEIGHT, float('inf')),
+                                 margin=_STAND_HEIGHT/4)
+    upright = rewards.tolerance(physics.thorax_upright(),
+                                bounds=(0.9, float('inf')), sigmoid='linear',
+                                margin=1.9, value_at_margin=0)
+    stand_reward = standing * upright
+    small_control = rewards.tolerance(physics.control(), margin=1,
+                                      value_at_margin=0,
+                                      sigmoid='quadratic').mean()
+    small_control = (4 + small_control) / 5
+    if self._move_speed == 0:
+      horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]]
+      dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean()
+      return small_control * stand_reward * dont_move
+    else:
+      com_velocity = np.linalg.norm(physics.center_of_mass_velocity()[[0, 1]])
+      move = rewards.tolerance(com_velocity,
+                               bounds=(self._move_speed, float('inf')),
+                               margin=self._move_speed, value_at_margin=0,
+                               sigmoid='linear')
+      move = (5*move + 1) / 6
+      return small_control * stand_reward * move
--- a/DPI/local_dm_control_suite/humanoid_CMU.xml
+++ b/DPI/local_dm_control_suite/humanoid_CMU.xml
@ -0,0 +1,289 @@
+<mujoco model="humanoid_CMU">
+
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <statistic extent="2" center="0 0 1"/>
+
+  <default class="main">
+    <joint limited="true" solimplimit="0 0.99 0.01" stiffness="0.1" armature=".01" damping="1"/>
+    <geom friction="0.7" solref="0.015 1" solimp="0.95 0.99 0.003"/>
+    <motor ctrllimited="true" ctrlrange="-1 1"/>
+    <default class="humanoid">
+      <geom type="capsule" material="self"/>
+      <default class="stiff_low">
+        <joint stiffness=".5" damping="4"/>
+      </default>
+      <default class="stiff_medium">
+        <joint stiffness="10" damping="5"/>
+      </default>
+      <default class="stiff_high">
+        <joint stiffness="30" damping="10"/>
+      </default>
+      <default class="touch">
+        <site group="3" rgba="0 0 1 .5"/>
+      </default>
+    </default>
+  </default>
+
+  <worldbody>
+    <geom name="floor" type="plane" conaffinity="1" size="100 100 .2" material="grid"/>
+    <light name="tracking_light" pos="0 0 7" dir="0 0 -1" mode="trackcom"/>
+    <camera name="back" pos="0 3 2.4" xyaxes="-1 0 0 0 -1 2" mode="trackcom"/>
+    <camera name="side" pos="-3 0 2.4" xyaxes="0 -1 0 1 0 2" mode="trackcom"/>
+    <body name="root" childclass="humanoid" pos="0 0 1" euler="90 0 0">
+      <site name="root" size=".01" rgba="0.5 0.5 0.5 0"/>
+      <freejoint name="root"/>
+      <geom name="root_geom" size="0.09 0.06" pos="0 -0.05 0" quat="1 0 -1 0"/>
+      <body name="lhipjoint">
+        <geom name="lhipjoint" size="0.008 0.022" pos="0.051 -0.046 0.025" quat="0.5708 -0.566602 -0.594264 0"/>
+        <body name="lfemur" pos="0.102 -0.092 0.05" quat="1 0 0 0.17365">
+          <joint name="lfemurrz" axis="0 0 1" range="-60 70" class="stiff_medium"/>
+          <joint name="lfemurry" axis="0 1 0" range="-70 70" class="stiff_medium"/>
+          <joint name="lfemurrx" axis="1 0 0" range="-160 20" class="stiff_medium"/>
+          <geom name="lfemur" size="0.06 0.17" pos="-.01 -0.202473 0" quat="0.7 -0.7 -0.1228 -0.07"/>
+          <body name="ltibia" pos="0 -0.404945 0">
+            <joint name="ltibiarx" axis="1 0 0" range="1   170" class="stiff_low"/>
+            <geom name="ltibia" size="0.03 0.1825614" pos="0 -0.202846 0" quat="0.7 -0.7 -0.1228 -0.1228"/>
+            <geom name="lcalf" size="0.045 0.08" pos="0 -0.1 -.01" quat="0.7 -0.7 -0.1228 -0.1228"/>
+            <body name="lfoot" pos="0 -0.405693 0" quat="0.707107 -0.707107 0 0">
+              <site name="lfoot_touch" type="box" pos="-.005 -.02 -0.025" size=".04 .08 .02" euler="10 0 0" class="touch"/>
+              <joint name="lfootrz" axis="0 0 1" range="-70 20" class="stiff_medium"/>
+              <joint name="lfootrx" axis="1 0 0" range="-45    90" class="stiff_medium"/>
+              <geom name="lfoot0" size="0.02 0.06" pos="-0.02 -0.023 -0.01" euler="100 -2 0"/>
+              <geom name="lfoot1" size="0.02 0.06" pos="0 -0.023 -0.01" euler="100 0 0"/>
+              <geom name="lfoot2" size="0.02 0.06" pos=".01 -0.023 -0.01" euler="100 10 0"/>
+              <body name="ltoes" pos="0 -0.106372 -0.0227756">
+                <joint name="ltoesrx" axis="1 0 0" range="-90 20"/>
+                <geom name="ltoes0" type="sphere" size="0.02" pos="-.025 -0.01 -.01"/>
+                <geom name="ltoes1" type="sphere" size="0.02" pos="0 -0.005 -.01"/>
+                <geom name="ltoes2" type="sphere" size="0.02" pos=".02 .001 -.01"/>
+                <site name="ltoes_touch" type="capsule" pos="-.005 -.005 -.01" size="0.025 0.02" zaxis="1 .2 0" class="touch"/>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+      <body name="rhipjoint">
+        <geom name="rhipjoint" size="0.008 0.022" pos="-0.051 -0.046 0.025" quat="0.574856 -0.547594 0.608014 0"/>
+        <body name="rfemur" pos="-0.102 -0.092 0.05" quat="1 0 0 -0.17365">
+          <joint name="rfemurrz" axis="0 0 1" range="-70 60" class="stiff_medium"/>
+          <joint name="rfemurry" axis="0 1 0" range="-70 70" class="stiff_medium"/>
+          <joint name="rfemurrx" axis="1 0 0" range="-160 20" class="stiff_medium"/>
+          <geom name="rfemur" size="0.06 0.17" pos=".01 -0.202473 0" quat="0.7 -0.7 0.1228 0.07"/>
+          <body name="rtibia" pos="0 -0.404945 0">
+            <joint name="rtibiarx" axis="1 0 0" range="1   170" class="stiff_low"/>
+            <geom name="rtibia" size="0.03 0.1825614" pos="0 -0.202846 0" quat="0.7 -0.7 0.1228 0.1228"/>
+            <geom name="rcalf" size="0.045 0.08" pos="0 -0.1 -.01" quat="0.7 -0.7 -0.1228 -0.1228"/>
+            <body name="rfoot" pos="0 -0.405693 0" quat="0.707107 -0.707107 0 0">
+              <site name="rfoot_touch" type="box" pos=".005 -.02 -0.025" size=".04 .08 .02" euler="10 0 0" class="touch"/>
+              <joint name="rfootrz" axis="0 0 1" range="-20 70" class="stiff_medium"/>
+              <joint name="rfootrx" axis="1 0 0" range="-45    90" class="stiff_medium"/>
+              <geom name="rfoot0" size="0.02 0.06" pos="0.02 -0.023 -0.01" euler="100 2 0"/>
+              <geom name="rfoot1" size="0.02 0.06" pos="0 -0.023 -0.01" euler="100 0 0"/>
+              <geom name="rfoot2" size="0.02 0.06" pos="-.01 -0.023 -0.01" euler="100 -10 0"/>
+              <body name="rtoes" pos="0 -0.106372 -0.0227756">
+                <joint name="rtoesrx" axis="1 0 0" range="-90 20"/>
+                <geom name="rtoes0" type="sphere" size="0.02" pos=".025 -0.01 -.01"/>
+                <geom name="rtoes1" type="sphere" size="0.02" pos="0 -0.005 -.01"/>
+                <geom name="rtoes2" type="sphere" size="0.02" pos="-.02 .001 -.01"/>
+                <site name="rtoes_touch" type="capsule" pos=".005 -.005 -.01" size="0.025 0.02" zaxis="1 -.2 0" class="touch"/>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+      <body name="lowerback">
+        <joint name="lowerbackrz" axis="0 0 1" range="-30 30" class="stiff_high"/>
+        <joint name="lowerbackry" axis="0 1 0" range="-30 30" class="stiff_high"/>
+        <joint name="lowerbackrx" axis="1 0 0" range="-20 45" class="stiff_high"/>
+        <geom name="lowerback" size="0.065 0.055" pos="0 0.056 .03" quat="1 0 1 0"/>
+        <body name="upperback" pos="0 0.1 -0.01">
+          <joint name="upperbackrz" axis="0 0 1" range="-30 30" class="stiff_high"/>
+          <joint name="upperbackry" axis="0 1 0" range="-30 30" class="stiff_high"/>
+          <joint name="upperbackrx" axis="1 0 0" range="-20 45" class="stiff_high"/>
+          <geom name="upperback" size="0.06 0.06" pos="0 0.06 0.02" quat="1 0 1 0"/>
+          <body name="thorax" pos="0.000512528 0.11356 0.000936821">
+            <joint name="thoraxrz" axis="0 0 1" range="-30 30" class="stiff_high"/>
+            <joint name="thoraxry" axis="0 1 0" range="-30 30" class="stiff_high"/>
+            <joint name="thoraxrx" axis="1 0 0" range="-20 45" class="stiff_high"/>
+            <geom name="thorax" size="0.08 0.07" pos="0 0.05 0" quat="1 0 1 0"/>
+            <body name="lowerneck" pos="0 0.113945 0.00468037">
+              <joint name="lowerneckrz" axis="0 0 1" range="-30 30" class="stiff_medium"/>
+              <joint name="lowerneckry" axis="0 1 0" range="-30 30" class="stiff_medium"/>
+              <joint name="lowerneckrx" axis="1 0 0" range="-20 45" class="stiff_medium"/>
+              <geom name="lowerneck" size="0.08 0.02" pos="0 0.04 -.02" quat="1 1 0 0"/>
+              <body name="upperneck" pos="0 0.09 0.01">
+                <joint name="upperneckrz" axis="0 0 1" range="-30 30" class="stiff_medium"/>
+                <joint name="upperneckry" axis="0 1 0" range="-30 30" class="stiff_medium"/>
+                <joint name="upperneckrx" axis="1 0 0" range="-20 45" class="stiff_medium"/>
+                <geom name="upperneck" size="0.05 0.03" pos="0 0.05 0" quat=".8 1 0 0"/>
+                <body name="head" pos="0 0.09 0">
+                  <camera name="egocentric" pos="0 0 0" xyaxes="-1 0 0 0 1 0" fovy="80"/>
+                  <joint name="headrz" axis="0 0 1" range="-30 30" class="stiff_medium"/>
+                  <joint name="headry" axis="0 1 0" range="-30 30" class="stiff_medium"/>
+                  <joint name="headrx" axis="1 0 0" range="-20 45" class="stiff_medium"/>
+                  <geom name="head" size="0.085 0.035" pos="0 0.11 0.03" quat="1 .9 0 0"/>
+                  <geom name="leye" type="sphere" size="0.02" pos=" .03 0.11 0.1"/>
+                  <geom name="reye" type="sphere" size="0.02" pos="-.03 0.11 0.1"/>
+                </body>
+              </body>
+            </body>
+            <body name="lclavicle" pos="0 0.113945 0.00468037">
+              <joint name="lclaviclerz" axis="0 0 1" range="0 20" class="stiff_high"/>
+              <joint name="lclaviclery" axis="0 1 0" range="-20 10" class="stiff_high"/>
+              <geom name="lclavicle" size="0.08 0.04" pos="0.09 0.05 -.01" quat="1 0 -1 -.4"/>
+              <body name="lhumerus" pos="0.183 0.076 0.01" quat="0.18 0.68 -0.68 0.18">
+                <joint name="lhumerusrz" axis="0 0 1" range="-90 90" class="stiff_low"/>
+                <joint name="lhumerusry" axis="0 1 0" range="-90 90" class="stiff_low"/>
+                <joint name="lhumerusrx" axis="1 0 0" range="-60 90" class="stiff_low"/>
+                <geom name="lhumerus" size="0.035 0.124" pos="0 -0.138 0" quat="0.612 -0.612 0.35 0.35"/>
+                <body name="lradius" pos="0 -0.277 0">
+                  <joint name="lradiusrx" axis="1 0 0" range="-10 170" class="stiff_low"/>
+                  <geom name="lradius" size="0.03 0.06" pos="0 -0.08 0" quat="0.612 -0.612 0.35 0.35"/>
+                  <body name="lwrist" pos="0 -0.17 0" quat="-0.5 0 0.866 0">
+                    <joint name="lwristry" axis="0 1 0" range="-180 0"/>
+                    <geom name="lwrist" size="0.025 0.03" pos="0 -0.02 0" quat="0 0 -1 -1"/>
+                    <body name="lhand" pos="0 -0.08 0">
+                      <joint name="lhandrz" axis="0 0 1" range="-45 45"/>
+                      <joint name="lhandrx" axis="1 0 0" range="-90 90"/>
+                      <geom name="lhand" type="ellipsoid" size=".048 0.02 0.06" pos="0 -0.047 0" quat="0 0 -1 -1"/>
+                      <body name="lfingers" pos="0 -0.08 0">
+                        <joint name="lfingersrx" axis="1 0 0" range="0 90"/>
+                        <geom name="lfinger0" size="0.01 0.04" pos="-.03 -0.05 0" quat="1 -1 0 0" />
+                        <geom name="lfinger1" size="0.01 0.04" pos="-.008 -0.06 0" quat="1 -1 0 0" />
+                        <geom name="lfinger2" size="0.009 0.04" pos=".014 -0.06 0" quat="1 -1 0 0" />
+                        <geom name="lfinger3" size="0.008 0.04" pos=".032 -0.05 0" quat="1 -1 0 0" />
+                      </body>
+                      <body name="lthumb" pos="-.02 -.03 0" quat="0.92388 0 0 -0.382683">
+                        <joint name="lthumbrz" axis="0 0 1" range="-45 45"/>
+                        <joint name="lthumbrx" axis="1 0 0" range="0 90"/>
+                        <geom name="lthumb" size="0.012 0.04" pos="0 -0.06 0" quat="0 0 -1 -1"/>
+                      </body>
+                    </body>
+                  </body>
+                </body>
+              </body>
+            </body>
+            <body name="rclavicle" pos="0 0.113945 0.00468037">
+              <joint name="rclaviclerz" axis="0 0 1" range="-20 0" class="stiff_high"/>
+              <joint name="rclaviclery" axis="0 1 0" range="-10 20" class="stiff_high"/>
+              <geom name="rclavicle" size="0.08 0.04" pos="-.09 0.05 -.01" quat="1 0 -1 .4"/>
+              <body name="rhumerus" pos="-0.183 0.076 0.01" quat="0.18 0.68 0.68 -0.18">
+                <joint name="rhumerusrz" axis="0 0 1" range="-90 90" class="stiff_low"/>
+                <joint name="rhumerusry" axis="0 1 0" range="-90 90" class="stiff_low"/>
+                <joint name="rhumerusrx" axis="1 0 0" range="-60 90" class="stiff_low"/>
+                <geom name="rhumerus" size="0.035 0.124" pos="0 -0.138 0" quat="0.61 -0.61 -0.35 -0.35"/>
+                <body name="rradius" pos="0 -0.277 0">
+                  <joint name="rradiusrx" axis="1 0 0" range="-10 170" class="stiff_low"/>
+                  <geom name="rradius" size="0.03 0.06" pos="0 -0.08 0" quat="0.612 -0.612 -0.35 -0.35"/>
+                  <body name="rwrist" pos="0 -0.17 0" quat="-0.5 0 -0.866 0">
+                    <joint name="rwristry" axis="0 1 0" range="-180 0"/>
+                    <geom name="rwrist" size="0.025 0.03" pos="0 -0.02 0" quat="0 0 1 1"/>
+                    <body name="rhand" pos="0 -0.08 0">
+                      <joint name="rhandrz" axis="0 0 1" range="-45 45"/>
+                      <joint name="rhandrx" axis="1 0 0" range="-90 90"/>
+                      <geom name="rhand" type="ellipsoid" size=".048 0.02 .06" pos="0 -0.047 0" quat="0 0 1 1"/>
+                      <body name="rfingers" pos="0 -0.08 0">
+                        <joint name="rfingersrx" axis="1 0 0" range="0 90"/>
+                        <geom name="rfinger0" size="0.01 0.04" pos=".03 -0.05 0" quat="1 -1  0 0" />
+                        <geom name="rfinger1" size="0.01 0.04" pos=".008 -0.06 0" quat="1 -1  0 0" />
+                        <geom name="rfinger2" size="0.009 0.04" pos="-.014 -0.06 0" quat="1 -1  0 0" />
+                        <geom name="rfinger3" size="0.008 0.04" pos="-.032 -0.05 0" quat="1 -1  0 0" />
+                      </body>
+                      <body name="rthumb" pos=".02 -.03 0" quat="0.92388 0 0 0.382683">
+                        <joint name="rthumbrz" axis="0 0 1" range="-45    45"/>
+                        <joint name="rthumbrx" axis="1 0 0" range="0 90"/>
+                        <geom name="rthumb" size="0.012 0.04" pos="0 -0.06 0" quat="0 0 1 1"/>
+                      </body>
+                    </body>
+                  </body>
+                </body>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+
+  <contact>
+    <exclude body1="lclavicle" body2="rclavicle"/>
+    <exclude body1="lowerneck" body2="lclavicle"/>
+    <exclude body1="lowerneck" body2="rclavicle"/>
+    <exclude body1="upperneck" body2="lclavicle"/>
+    <exclude body1="upperneck" body2="rclavicle"/>
+  </contact>
+
+  <actuator>
+    <motor name="headrx" joint="headrx" gear="20"/>
+    <motor name="headry" joint="headry" gear="20"/>
+    <motor name="headrz" joint="headrz" gear="20"/>
+    <motor name="lclaviclery" joint="lclaviclery" gear="20"/>
+    <motor name="lclaviclerz" joint="lclaviclerz" gear="20"/>
+    <motor name="lfemurrx" joint="lfemurrx" gear="120"/>
+    <motor name="lfemurry" joint="lfemurry" gear="40"/>
+    <motor name="lfemurrz" joint="lfemurrz" gear="40"/>
+    <motor name="lfingersrx" joint="lfingersrx" gear="20"/>
+    <motor name="lfootrx" joint="lfootrx" gear="20"/>
+    <motor name="lfootrz" joint="lfootrz" gear="20"/>
+    <motor name="lhandrx" joint="lhandrx" gear="20"/>
+    <motor name="lhandrz" joint="lhandrz" gear="20"/>
+    <motor name="lhumerusrx" joint="lhumerusrx" gear="40"/>
+    <motor name="lhumerusry" joint="lhumerusry" gear="40"/>
+    <motor name="lhumerusrz" joint="lhumerusrz" gear="40"/>
+    <motor name="lowerbackrx" joint="lowerbackrx" gear="40"/>
+    <motor name="lowerbackry" joint="lowerbackry" gear="40"/>
+    <motor name="lowerbackrz" joint="lowerbackrz" gear="40"/>
+    <motor name="lowerneckrx" joint="lowerneckrx" gear="20"/>
+    <motor name="lowerneckry" joint="lowerneckry" gear="20"/>
+    <motor name="lowerneckrz" joint="lowerneckrz" gear="20"/>
+    <motor name="lradiusrx" joint="lradiusrx" gear="40"/>
+    <motor name="lthumbrx" joint="lthumbrx" gear="20"/>
+    <motor name="lthumbrz" joint="lthumbrz" gear="20"/>
+    <motor name="ltibiarx" joint="ltibiarx" gear="80"/>
+    <motor name="ltoesrx" joint="ltoesrx" gear="20"/>
+    <motor name="lwristry" joint="lwristry" gear="20"/>
+    <motor name="rclaviclery" joint="rclaviclery" gear="20"/>
+    <motor name="rclaviclerz" joint="rclaviclerz" gear="20"/>
+    <motor name="rfemurrx" joint="rfemurrx" gear="120"/>
+    <motor name="rfemurry" joint="rfemurry" gear="40"/>
+    <motor name="rfemurrz" joint="rfemurrz" gear="40"/>
+    <motor name="rfingersrx" joint="rfingersrx" gear="20"/>
+    <motor name="rfootrx" joint="rfootrx" gear="20"/>
+    <motor name="rfootrz" joint="rfootrz" gear="20"/>
+    <motor name="rhandrx" joint="rhandrx" gear="20"/>
+    <motor name="rhandrz" joint="rhandrz" gear="20"/>
+    <motor name="rhumerusrx" joint="rhumerusrx" gear="40"/>
+    <motor name="rhumerusry" joint="rhumerusry" gear="40"/>
+    <motor name="rhumerusrz" joint="rhumerusrz" gear="40"/>
+    <motor name="rradiusrx" joint="rradiusrx" gear="40"/>
+    <motor name="rthumbrx" joint="rthumbrx" gear="20"/>
+    <motor name="rthumbrz" joint="rthumbrz" gear="20"/>
+    <motor name="rtibiarx" joint="rtibiarx" gear="80"/>
+    <motor name="rtoesrx" joint="rtoesrx" gear="20"/>
+    <motor name="rwristry" joint="rwristry" gear="20"/>
+    <motor name="thoraxrx" joint="thoraxrx" gear="40"/>
+    <motor name="thoraxry" joint="thoraxry" gear="40"/>
+    <motor name="thoraxrz" joint="thoraxrz" gear="40"/>
+    <motor name="upperbackrx" joint="upperbackrx" gear="40"/>
+    <motor name="upperbackry" joint="upperbackry" gear="40"/>
+    <motor name="upperbackrz" joint="upperbackrz" gear="40"/>
+    <motor name="upperneckrx" joint="upperneckrx" gear="20"/>
+    <motor name="upperneckry" joint="upperneckry" gear="20"/>
+    <motor name="upperneckrz" joint="upperneckrz" gear="20"/>
+  </actuator>
+
+  <sensor>
+    <subtreelinvel name="thorax_subtreelinvel" body="thorax"/>
+    <velocimeter name="sensor_root_veloc" site="root"/>
+    <gyro name="sensor_root_gyro" site="root"/>
+    <accelerometer name="sensor_root_accel" site="root"/>
+    <touch name="sensor_touch_ltoes" site="ltoes_touch"/>
+    <touch name="sensor_touch_rtoes" site="rtoes_touch"/>
+    <touch name="sensor_touch_rfoot" site="rfoot_touch"/>
+    <touch name="sensor_touch_lfoot" site="lfoot_touch"/>
+  </sensor>
+
+</mujoco>
--- a/DPI/local_dm_control_suite/lqr.py
+++ b/DPI/local_dm_control_suite/lqr.py
@ -0,0 +1,272 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Procedurally generated LQR domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import os
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.utils import containers
+from dm_control.utils import xml_tools
+from lxml import etree
+import numpy as np
+from six.moves import range
+
+from dm_control.utils import io as resources
+
+_DEFAULT_TIME_LIMIT = float('inf')
+_CONTROL_COST_COEF = 0.1
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets(n_bodies, n_actuators, random):
+  """Returns the model description as an XML string and a dict of assets.
+
+  Args:
+    n_bodies: An int, number of bodies of the LQR.
+    n_actuators: An int, number of actuated bodies of the LQR. `n_actuators`
+      should be less or equal than `n_bodies`.
+    random: A `numpy.random.RandomState` instance.
+
+  Returns:
+    A tuple `(model_xml_string, assets)`, where `assets` is a dict consisting of
+    `{filename: contents_string}` pairs.
+  """
+  return _make_model(n_bodies, n_actuators, random), common.ASSETS
+
+
+@SUITE.add()
+def lqr_2_1(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns an LQR environment with 2 bodies of which the first is actuated."""
+  return _make_lqr(n_bodies=2,
+                   n_actuators=1,
+                   control_cost_coef=_CONTROL_COST_COEF,
+                   time_limit=time_limit,
+                   random=random,
+                   environment_kwargs=environment_kwargs)
+
+
+@SUITE.add()
+def lqr_6_2(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns an LQR environment with 6 bodies of which first 2 are actuated."""
+  return _make_lqr(n_bodies=6,
+                   n_actuators=2,
+                   control_cost_coef=_CONTROL_COST_COEF,
+                   time_limit=time_limit,
+                   random=random,
+                   environment_kwargs=environment_kwargs)
+
+
+def _make_lqr(n_bodies, n_actuators, control_cost_coef, time_limit, random,
+              environment_kwargs):
+  """Returns a LQR environment.
+
+  Args:
+    n_bodies: An int, number of bodies of the LQR.
+    n_actuators: An int, number of actuated bodies of the LQR. `n_actuators`
+      should be less or equal than `n_bodies`.
+    control_cost_coef: A number, the coefficient of the control cost.
+    time_limit: An int, maximum time for each episode in seconds.
+    random: Either an existing `numpy.random.RandomState` instance, an
+      integer seed for creating a new `RandomState`, or None to select a seed
+      automatically.
+    environment_kwargs: A `dict` specifying keyword arguments for the
+      environment, or None.
+
+  Returns:
+    A LQR environment with `n_bodies` bodies of which first `n_actuators` are
+    actuated.
+  """
+
+  if not isinstance(random, np.random.RandomState):
+    random = np.random.RandomState(random)
+
+  model_string, assets = get_model_and_assets(n_bodies, n_actuators,
+                                              random=random)
+  physics = Physics.from_xml_string(model_string, assets=assets)
+  task = LQRLevel(control_cost_coef, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(physics, task, time_limit=time_limit,
+                             **environment_kwargs)
+
+
+def _make_body(body_id, stiffness_range, damping_range, random):
+  """Returns an `etree.Element` defining a body.
+
+  Args:
+    body_id: Id of the created body.
+    stiffness_range: A tuple of (stiffness_lower_bound, stiffness_uppder_bound).
+      The stiffness of the joint is drawn uniformly from this range.
+    damping_range: A tuple of (damping_lower_bound, damping_upper_bound). The
+      damping of the joint is drawn uniformly from this range.
+    random: A `numpy.random.RandomState` instance.
+
+  Returns:
+   A new instance of `etree.Element`. A body element with two children: joint
+   and geom.
+  """
+  body_name = 'body_{}'.format(body_id)
+  joint_name = 'joint_{}'.format(body_id)
+  geom_name = 'geom_{}'.format(body_id)
+
+  body = etree.Element('body', name=body_name)
+  body.set('pos', '.25 0 0')
+  joint = etree.SubElement(body, 'joint', name=joint_name)
+  body.append(etree.Element('geom', name=geom_name))
+  joint.set('stiffness',
+            str(random.uniform(stiffness_range[0], stiffness_range[1])))
+  joint.set('damping',
+            str(random.uniform(damping_range[0], damping_range[1])))
+  return body
+
+
+def _make_model(n_bodies,
+                n_actuators,
+                random,
+                stiffness_range=(15, 25),
+                damping_range=(0, 0)):
+  """Returns an MJCF XML string defining a model of springs and dampers.
+
+  Args:
+    n_bodies: An integer, the number of bodies (DoFs) in the system.
+    n_actuators: An integer, the number of actuated bodies.
+    random: A `numpy.random.RandomState` instance.
+    stiffness_range: A tuple containing minimum and maximum stiffness. Each
+      joint's stiffness is sampled uniformly from this interval.
+    damping_range: A tuple containing minimum and maximum damping. Each joint's
+      damping is sampled uniformly from this interval.
+
+  Returns:
+    An MJCF string describing the linear system.
+
+  Raises:
+    ValueError: If the number of bodies or actuators is erronous.
+  """
+  if n_bodies < 1 or n_actuators < 1:
+    raise ValueError('At least 1 body and 1 actuator required.')
+  if n_actuators > n_bodies:
+    raise ValueError('At most 1 actuator per body.')
+
+  file_path = os.path.join(os.path.dirname(__file__), 'lqr.xml')
+  with resources.GetResourceAsFile(file_path) as xml_file:
+    mjcf = xml_tools.parse(xml_file)
+  parent = mjcf.find('./worldbody')
+  actuator = etree.SubElement(mjcf.getroot(), 'actuator')
+  tendon = etree.SubElement(mjcf.getroot(), 'tendon')
+
+  for body in range(n_bodies):
+    # Inserting body.
+    child = _make_body(body, stiffness_range, damping_range, random)
+    site_name = 'site_{}'.format(body)
+    child.append(etree.Element('site', name=site_name))
+
+    if body == 0:
+      child.set('pos', '.25 0 .1')
+    # Add actuators to the first n_actuators bodies.
+    if body < n_actuators:
+      # Adding actuator.
+      joint_name = 'joint_{}'.format(body)
+      motor_name = 'motor_{}'.format(body)
+      child.find('joint').set('name', joint_name)
+      actuator.append(etree.Element('motor', name=motor_name, joint=joint_name))
+
+    # Add a tendon between consecutive bodies (for visualisation purposes only).
+    if body < n_bodies - 1:
+      child_site_name = 'site_{}'.format(body + 1)
+      tendon_name = 'tendon_{}'.format(body)
+      spatial = etree.SubElement(tendon, 'spatial', name=tendon_name)
+      spatial.append(etree.Element('site', site=site_name))
+      spatial.append(etree.Element('site', site=child_site_name))
+    parent.append(child)
+    parent = child
+
+  return etree.tostring(mjcf, pretty_print=True)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the LQR domain."""
+
+  def state_norm(self):
+    """Returns the norm of the physics state."""
+    return np.linalg.norm(self.state())
+
+
+class LQRLevel(base.Task):
+  """A Linear Quadratic Regulator `Task`."""
+
+  _TERMINAL_TOL = 1e-6
+
+  def __init__(self, control_cost_coef, random=None):
+    """Initializes an LQR level with cost = sum(states^2) + c*sum(controls^2).
+
+    Args:
+      control_cost_coef: The coefficient of the control cost.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+
+    Raises:
+      ValueError: If the control cost coefficient is not positive.
+    """
+    if control_cost_coef <= 0:
+      raise ValueError('control_cost_coef must be positive.')
+
+    self._control_cost_coef = control_cost_coef
+    super(LQRLevel, self).__init__(random=random)
+
+  @property
+  def control_cost_coef(self):
+    return self._control_cost_coef
+
+  def initialize_episode(self, physics):
+    """Random state sampled from a unit sphere."""
+    ndof = physics.model.nq
+    unit = self.random.randn(ndof)
+    physics.data.qpos[:] = np.sqrt(2) * unit / np.linalg.norm(unit)
+    super(LQRLevel, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of the state."""
+    obs = collections.OrderedDict()
+    obs['position'] = physics.position()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a quadratic state and control reward."""
+    position = physics.position()
+    state_cost = 0.5 * np.dot(position, position)
+    control_signal = physics.control()
+    control_l2_norm = 0.5 * np.dot(control_signal, control_signal)
+    return 1 - (state_cost + control_l2_norm * self._control_cost_coef)
+
+  def get_evaluation(self, physics):
+    """Returns a sparse evaluation reward that is not used for learning."""
+    return float(physics.state_norm() <= 0.01)
+
+  def get_termination(self, physics):
+    """Terminates when the state norm is smaller than epsilon."""
+    if physics.state_norm() < self._TERMINAL_TOL:
+      return 0.0
--- a/DPI/local_dm_control_suite/lqr.xml
+++ b/DPI/local_dm_control_suite/lqr.xml
@ -0,0 +1,26 @@
+<mujoco model="LQR">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep=".03"/>
+
+  <default>
+    <joint type="slide" axis="0 1 0"/>
+    <geom type="sphere" size=".1" material="self"/>
+    <site size=".01"/>
+    <tendon width=".02" material="self"/>
+  </default>
+
+  <option>
+    <flag constraint="disable"/>
+  </option>
+
+  <worldbody>
+    <light name="light" pos="0 0 2"/>
+    <camera name="cam0" pos="-1.428 -0.311 0.856" xyaxes="0.099 -0.995 0.000 0.350 0.035 0.936"/>
+    <camera name="cam1" pos="1.787 2.452 4.331" xyaxes="-1 0 0 0 -0.868 0.497"/>
+    <geom name="floor" size="4 1 .2" type="plane" material="grid"/>
+    <geom name="origin" pos="2 0 .05" size="2 .003 .05" type="box" rgba=".5 .5 .5 .5"/>
+  </worldbody>
+</mujoco>
--- a/DPI/local_dm_control_suite/lqr_solver.py
+++ b/DPI/local_dm_control_suite/lqr_solver.py
@ -0,0 +1,142 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+r"""Optimal policy for LQR levels.
+
+LQR control problem is described in
+https://en.wikipedia.org/wiki/Linear-quadratic_regulator#Infinite-horizon.2C_discrete-time_LQR
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import logging
+from dm_control.mujoco import wrapper
+import numpy as np
+from six.moves import range
+
+try:
+  import scipy.linalg as sp  # pylint: disable=g-import-not-at-top
+except ImportError:
+  sp = None
+
+
+def _solve_dare(a, b, q, r):
+  """Solves the Discrete-time Algebraic Riccati Equation (DARE) by iteration.
+
+  Algebraic Riccati Equation:
+  ```none
+  P_{t-1} = Q + A' * P_{t} * A -
+            A' * P_{t} * B * (R + B' * P_{t} * B)^{-1} * B' * P_{t} * A
+  ```
+
+  Args:
+    a: A 2 dimensional numpy array, transition matrix A.
+    b: A 2 dimensional numpy array, control matrix B.
+    q: A 2 dimensional numpy array, symmetric positive definite cost matrix.
+    r: A 2 dimensional numpy array, symmetric positive definite cost matrix
+
+  Returns:
+    A numpy array, a real symmetric matrix P which is the solution to DARE.
+
+  Raises:
+    RuntimeError: If the computed P matrix is not symmetric and
+      positive-definite.
+  """
+  p = np.eye(len(a))
+  for _ in range(1000000):
+    a_p = a.T.dot(p)  # A' * P_t
+    a_p_b = np.dot(a_p, b)  # A' * P_t * B
+    # Algebraic Riccati Equation.
+    p_next = q + np.dot(a_p, a) - a_p_b.dot(
+        np.linalg.solve(b.T.dot(p.dot(b)) + r, a_p_b.T))
+    p_next += p_next.T
+    p_next *= .5
+    if np.abs(p - p_next).max() < 1e-12:
+      break
+    p = p_next
+  else:
+    logging.warning('DARE solver did not converge')
+  try:
+    # Check that the result is symmetric and positive-definite.
+    np.linalg.cholesky(p_next)
+  except np.linalg.LinAlgError:
+    raise RuntimeError('ARE solver failed: P matrix is not symmetric and '
+                       'positive-definite.')
+  return p_next
+
+
+def solve(env):
+  """Returns the optimal value and policy for LQR problem.
+
+  Args:
+    env: An instance of `control.EnvironmentV2` with LQR level.
+
+  Returns:
+    p: A numpy array, the Hessian of the optimal total cost-to-go (value
+      function at state x) is V(x) = .5 * x' * p * x.
+    k: A numpy array which gives the optimal linear policy u = k * x.
+    beta: The maximum eigenvalue of (a + b * k). Under optimal policy, at
+      timestep n the state tends to 0 like beta^n.
+
+  Raises:
+    RuntimeError: If the controlled system is unstable.
+  """
+  n = env.physics.model.nq  # number of DoFs
+  m = env.physics.model.nu  # number of controls
+
+  # Compute the mass matrix.
+  mass = np.zeros((n, n))
+  wrapper.mjbindings.mjlib.mj_fullM(env.physics.model.ptr, mass,
+                                    env.physics.data.qM)
+
+  # Compute input matrices a, b, q and r to the DARE solvers.
+  # State transition matrix a.
+  stiffness = np.diag(env.physics.model.jnt_stiffness.ravel())
+  damping = np.diag(env.physics.model.dof_damping.ravel())
+  dt = env.physics.model.opt.timestep
+
+  j = np.linalg.solve(-mass, np.hstack((stiffness, damping)))
+  a = np.eye(2 * n) + dt * np.vstack(
+      (dt * j + np.hstack((np.zeros((n, n)), np.eye(n))), j))
+
+  # Control transition matrix b.
+  b = env.physics.data.actuator_moment.T
+  bc = np.linalg.solve(mass, b)
+  b = dt * np.vstack((dt * bc, bc))
+
+  # State cost Hessian q.
+  q = np.diag(np.hstack([np.ones(n), np.zeros(n)]))
+
+  # Control cost Hessian r.
+  r = env.task.control_cost_coef * np.eye(m)
+
+  if sp:
+    # Use scipy's faster DARE solver if available.
+    solve_dare = sp.solve_discrete_are
+  else:
+    # Otherwise fall back on a slower internal implementation.
+    solve_dare = _solve_dare
+
+  # Solve the discrete algebraic Riccati equation.
+  p = solve_dare(a, b, q, r)
+  k = -np.linalg.solve(b.T.dot(p.dot(b)) + r, b.T.dot(p.dot(a)))
+
+  # Under optimal policy, state tends to 0 like beta^n_timesteps
+  beta = np.abs(np.linalg.eigvals(a + b.dot(k))).max()
+  if beta >= 1.0:
+    raise RuntimeError('Controlled system is unstable.')
+  return p, k, beta
--- a/DPI/local_dm_control_suite/manipulator.py
+++ b/DPI/local_dm_control_suite/manipulator.py
@ -0,0 +1,290 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Planar Manipulator domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.utils import containers
+from dm_control.utils import rewards
+from dm_control.utils import xml_tools
+
+from lxml import etree
+import numpy as np
+
+_CLOSE = .01    # (Meters) Distance below which a thing is considered close.
+_CONTROL_TIMESTEP = .01  # (Seconds)
+_TIME_LIMIT = 10  # (Seconds)
+_P_IN_HAND = .1  # Probabillity of object-in-hand initial state
+_P_IN_TARGET = .1  # Probabillity of object-in-target initial state
+_ARM_JOINTS = ['arm_root', 'arm_shoulder', 'arm_elbow', 'arm_wrist',
+               'finger', 'fingertip', 'thumb', 'thumbtip']
+_ALL_PROPS = frozenset(['ball', 'target_ball', 'cup',
+                        'peg', 'target_peg', 'slot'])
+
+SUITE = containers.TaggedTasks()
+
+
+def make_model(use_peg, insert):
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  xml_string = common.read_model('manipulator.xml')
+  parser = etree.XMLParser(remove_blank_text=True)
+  mjcf = etree.XML(xml_string, parser)
+
+  # Select the desired prop.
+  if use_peg:
+    required_props = ['peg', 'target_peg']
+    if insert:
+      required_props += ['slot']
+  else:
+    required_props = ['ball', 'target_ball']
+    if insert:
+      required_props += ['cup']
+
+  # Remove unused props
+  for unused_prop in _ALL_PROPS.difference(required_props):
+    prop = xml_tools.find_element(mjcf, 'body', unused_prop)
+    prop.getparent().remove(prop)
+
+  return etree.tostring(mjcf, pretty_print=True), common.ASSETS
+
+
+@SUITE.add('benchmarking', 'hard')
+def bring_ball(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
+               environment_kwargs=None):
+  """Returns manipulator bring task with the ball prop."""
+  use_peg = False
+  insert = False
+  physics = Physics.from_xml_string(*make_model(use_peg, insert))
+  task = Bring(use_peg=use_peg, insert=insert,
+               fully_observable=fully_observable, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+      **environment_kwargs)
+
+
+@SUITE.add('hard')
+def bring_peg(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
+              environment_kwargs=None):
+  """Returns manipulator bring task with the peg prop."""
+  use_peg = True
+  insert = False
+  physics = Physics.from_xml_string(*make_model(use_peg, insert))
+  task = Bring(use_peg=use_peg, insert=insert,
+               fully_observable=fully_observable, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+      **environment_kwargs)
+
+
+@SUITE.add('hard')
+def insert_ball(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
+                environment_kwargs=None):
+  """Returns manipulator insert task with the ball prop."""
+  use_peg = False
+  insert = True
+  physics = Physics.from_xml_string(*make_model(use_peg, insert))
+  task = Bring(use_peg=use_peg, insert=insert,
+               fully_observable=fully_observable, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+      **environment_kwargs)
+
+
+@SUITE.add('hard')
+def insert_peg(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
+               environment_kwargs=None):
+  """Returns manipulator insert task with the peg prop."""
+  use_peg = True
+  insert = True
+  physics = Physics.from_xml_string(*make_model(use_peg, insert))
+  task = Bring(use_peg=use_peg, insert=insert,
+               fully_observable=fully_observable, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+      **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics with additional features for the Planar Manipulator domain."""
+
+  def bounded_joint_pos(self, joint_names):
+    """Returns joint positions as (sin, cos) values."""
+    joint_pos = self.named.data.qpos[joint_names]
+    return np.vstack([np.sin(joint_pos), np.cos(joint_pos)]).T
+
+  def joint_vel(self, joint_names):
+    """Returns joint velocities."""
+    return self.named.data.qvel[joint_names]
+
+  def body_2d_pose(self, body_names, orientation=True):
+    """Returns positions and/or orientations of bodies."""
+    if not isinstance(body_names, str):
+      body_names = np.array(body_names).reshape(-1, 1)  # Broadcast indices.
+    pos = self.named.data.xpos[body_names, ['x', 'z']]
+    if orientation:
+      ori = self.named.data.xquat[body_names, ['qw', 'qy']]
+      return np.hstack([pos, ori])
+    else:
+      return pos
+
+  def touch(self):
+    return np.log1p(self.data.sensordata)
+
+  def site_distance(self, site1, site2):
+    site1_to_site2 = np.diff(self.named.data.site_xpos[[site2, site1]], axis=0)
+    return np.linalg.norm(site1_to_site2)
+
+
+class Bring(base.Task):
+  """A Bring `Task`: bring the prop to the target."""
+
+  def __init__(self, use_peg, insert, fully_observable, random=None):
+    """Initialize an instance of the `Bring` task.
+
+    Args:
+      use_peg: A `bool`, whether to replace the ball prop with the peg prop.
+      insert: A `bool`, whether to insert the prop in a receptacle.
+      fully_observable: A `bool`, whether the observation should contain the
+        position and velocity of the object being manipulated and the target
+        location.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._use_peg = use_peg
+    self._target = 'target_peg' if use_peg else 'target_ball'
+    self._object = 'peg' if self._use_peg else 'ball'
+    self._object_joints = ['_'.join([self._object, dim]) for dim in 'xzy']
+    self._receptacle = 'slot' if self._use_peg else 'cup'
+    self._insert = insert
+    self._fully_observable = fully_observable
+    super(Bring, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode."""
+    # Local aliases
+    choice = self.random.choice
+    uniform = self.random.uniform
+    model = physics.named.model
+    data = physics.named.data
+
+    # Find a collision-free random initial configuration.
+    penetrating = True
+    while penetrating:
+
+      # Randomise angles of arm joints.
+      is_limited = model.jnt_limited[_ARM_JOINTS].astype(np.bool)
+      joint_range = model.jnt_range[_ARM_JOINTS]
+      lower_limits = np.where(is_limited, joint_range[:, 0], -np.pi)
+      upper_limits = np.where(is_limited, joint_range[:, 1], np.pi)
+      angles = uniform(lower_limits, upper_limits)
+      data.qpos[_ARM_JOINTS] = angles
+
+      # Symmetrize hand.
+      data.qpos['finger'] = data.qpos['thumb']
+
+      # Randomise target location.
+      target_x = uniform(-.4, .4)
+      target_z = uniform(.1, .4)
+      if self._insert:
+        target_angle = uniform(-np.pi/3, np.pi/3)
+        model.body_pos[self._receptacle, ['x', 'z']] = target_x, target_z
+        model.body_quat[self._receptacle, ['qw', 'qy']] = [
+            np.cos(target_angle/2), np.sin(target_angle/2)]
+      else:
+        target_angle = uniform(-np.pi, np.pi)
+
+      model.body_pos[self._target, ['x', 'z']] = target_x, target_z
+      model.body_quat[self._target, ['qw', 'qy']] = [
+          np.cos(target_angle/2), np.sin(target_angle/2)]
+
+      # Randomise object location.
+      object_init_probs = [_P_IN_HAND, _P_IN_TARGET, 1-_P_IN_HAND-_P_IN_TARGET]
+      init_type = choice(['in_hand', 'in_target', 'uniform'],
+                         p=object_init_probs)
+      if init_type == 'in_target':
+        object_x = target_x
+        object_z = target_z
+        object_angle = target_angle
+      elif init_type == 'in_hand':
+        physics.after_reset()
+        object_x = data.site_xpos['grasp', 'x']
+        object_z = data.site_xpos['grasp', 'z']
+        grasp_direction = data.site_xmat['grasp', ['xx', 'zx']]
+        object_angle = np.pi-np.arctan2(grasp_direction[1], grasp_direction[0])
+      else:
+        object_x = uniform(-.5, .5)
+        object_z = uniform(0, .7)
+        object_angle = uniform(0, 2*np.pi)
+        data.qvel[self._object + '_x'] = uniform(-5, 5)
+
+      data.qpos[self._object_joints] = object_x, object_z, object_angle
+
+      # Check for collisions.
+      physics.after_reset()
+      penetrating = physics.data.ncon > 0
+
+    super(Bring, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns either features or only sensors (to be used with pixels)."""
+    obs = collections.OrderedDict()
+    obs['arm_pos'] = physics.bounded_joint_pos(_ARM_JOINTS)
+    obs['arm_vel'] = physics.joint_vel(_ARM_JOINTS)
+    obs['touch'] = physics.touch()
+    if self._fully_observable:
+      obs['hand_pos'] = physics.body_2d_pose('hand')
+      obs['object_pos'] = physics.body_2d_pose(self._object)
+      obs['object_vel'] = physics.joint_vel(self._object_joints)
+      obs['target_pos'] = physics.body_2d_pose(self._target)
+    return obs
+
+  def _is_close(self, distance):
+    return rewards.tolerance(distance, (0, _CLOSE), _CLOSE*2)
+
+  def _peg_reward(self, physics):
+    """Returns a reward for bringing the peg prop to the target."""
+    grasp = self._is_close(physics.site_distance('peg_grasp', 'grasp'))
+    pinch = self._is_close(physics.site_distance('peg_pinch', 'pinch'))
+    grasping = (grasp + pinch) / 2
+    bring = self._is_close(physics.site_distance('peg', 'target_peg'))
+    bring_tip = self._is_close(physics.site_distance('target_peg_tip',
+                                                     'peg_tip'))
+    bringing = (bring + bring_tip) / 2
+    return max(bringing, grasping/3)
+
+  def _ball_reward(self, physics):
+    """Returns a reward for bringing the ball prop to the target."""
+    return self._is_close(physics.site_distance('ball', 'target_ball'))
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+    if self._use_peg:
+      return self._peg_reward(physics)
+    else:
+      return self._ball_reward(physics)
--- a/DPI/local_dm_control_suite/manipulator.xml
+++ b/DPI/local_dm_control_suite/manipulator.xml
@ -0,0 +1,211 @@
+<mujoco model="planar manipulator">
+
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials.xml"/>
+  <asset>
+    <texture name="background" builtin="flat" type="2d" mark="random" markrgb="1 1 1" width="800" height="800" rgb1=".2 .3 .4"/>
+    <material name="background" texture="background" texrepeat="1 1" texuniform="true"/>
+  </asset>
+
+  <visual>
+    <map shadowclip=".5"/>
+    <quality shadowsize="2048"/>
+  </visual>>
+
+  <option timestep="0.001" cone="elliptic"/>
+
+  <default>
+    <geom friction=".7" solimp="0.9 0.97 0.001" solref=".005 1"/>
+    <joint solimplimit="0 0.99 0.01" solreflimit=".005 1"/>
+    <general ctrllimited="true"/>
+    <tendon width="0.01"/>
+    <site size=".003 .003 .003" material="site" group="3"/>
+
+    <default class="arm">
+      <geom type="capsule" material="self" density="500"/>
+      <joint type="hinge" pos="0 0 0" axis="0 -1 0" limited="true"/>
+      <default class="hand">
+        <joint damping=".5" range="-10 60"/>
+        <geom size=".008"/>
+        <site  type="box" size=".018 .005 .005" pos=".022 0 -.002" euler="0 15 0" group="4"/>
+        <default class="fingertip">
+          <geom type="sphere" size=".008" material="effector"/>
+          <joint damping=".01" stiffness=".01" range="-40 20"/>
+          <site  size=".012 .005 .008" pos=".003 0 .003" group="4" euler="0 0 0"/>
+        </default>
+      </default>
+    </default>
+
+    <default class="object">
+      <geom material="self"/>
+    </default>
+
+    <default class="task">
+      <site rgba="0 0 0 0"/>
+    </default>
+
+    <default class="obstacle">
+      <geom material="decoration" friction="0"/>
+    </default>
+
+    <default class="ghost">
+      <geom material="target" contype="0" conaffinity="0"/>
+    </default>
+  </default>
+
+  <worldbody>
+    <!-- Arena -->
+    <light name="light" directional="true" diffuse=".6 .6 .6" pos="0 0 1" specular=".3 .3 .3"/>
+    <geom name="floor" type="plane" pos="0 0 0" size=".4 .2 10" material="grid"/>
+    <geom name="wall1" type="plane" pos="-.682843 0 .282843" size=".4 .2 10" material="grid" zaxis="1 0 1"/>
+    <geom name="wall2" type="plane" pos=".682843 0 .282843" size=".4 .2 10" material="grid" zaxis="-1 0 1"/>
+    <geom name="background" type="plane" pos="0 .2 .5" size="1 .5 10" material="background" zaxis="0 -1 0"/>
+    <camera name="fixed" pos="0 -16 .4" xyaxes="1 0 0 0 0 1" fovy="4"/>
+
+    <!-- Arm -->
+    <geom name="arm_root" type="cylinder" fromto="0 -.022 .4 0 .022 .4" size=".024"
+          material="decoration" contype="0" conaffinity="0"/>
+    <body name="upper_arm" pos="0 0 .4" childclass="arm">
+      <joint name="arm_root" damping="2" limited="false"/>
+      <geom  name="upper_arm"  size=".02" fromto="0 0 0 0 0 .18"/>
+      <body  name="middle_arm" pos="0 0 .18" childclass="arm">
+        <joint name="arm_shoulder" damping="1.5" range="-160 160"/>
+        <geom  name="middle_arm"  size=".017" fromto="0 0 0 0 0 .15"/>
+        <body  name="lower_arm" pos="0 0 .15">
+          <joint name="arm_elbow" damping="1" range="-160 160"/>
+          <geom  name="lower_arm" size=".014" fromto="0 0 0 0 0 .12"/>
+          <body  name="hand" pos="0 0 .12">
+            <joint name="arm_wrist" damping=".5" range="-140 140" />
+            <geom  name="hand" size=".011" fromto="0 0 0 0 0 .03"/>
+            <geom  name="palm1"  fromto="0 0 .03  .03 0 .045" class="hand"/>
+            <geom  name="palm2"  fromto="0 0 .03 -.03 0 .045" class="hand"/>
+            <site  name="grasp" pos="0 0 .065"/>
+            <body  name="pinch site" pos="0 0 .090">
+              <site  name="pinch"/>
+              <inertial pos="0 0 0" mass="1e-6" diaginertia="1e-12 1e-12 1e-12"/>
+              <camera name="hand" pos="0 -.3 0" xyaxes="1 0 0 0 0 1" mode="track"/>
+            </body>
+            <site  name="palm_touch" type="box" group="4" size=".025 .005 .008" pos="0 0 .043"/>
+
+            <body name="thumb" pos=".03 0 .045" euler="0 -90 0" childclass="hand">
+              <joint name="thumb"/>
+              <geom  name="thumb1"  fromto="0 0 0 .02 0 -.01" size=".007"/>
+              <geom  name="thumb2"  fromto=".02 0 -.01 .04 0 -.01" size=".007"/>
+              <site  name="thumb_touch" group="4"/>
+              <body  name="thumbtip" pos=".05 0 -.01" childclass="fingertip">
+                <joint name="thumbtip"/>
+                <geom  name="thumbtip1" pos="-.003 0 0" />
+                <geom  name="thumbtip2" pos=".003 0 0" />
+                <site  name="thumbtip_touch" group="4"/>
+              </body>
+            </body>
+
+            <body name="finger" pos="-.03 0 .045" euler="0 90 180" childclass="hand">
+              <joint name="finger"/>
+              <geom  name="finger1"  fromto="0 0 0 .02 0 -.01" size=".007" />
+              <geom  name="finger2"  fromto=".02 0 -.01 .04 0 -.01" size=".007"/>
+              <site  name="finger_touch"/>
+              <body  name="fingertip" pos=".05 0 -.01" childclass="fingertip">
+                <joint name="fingertip"/>
+                <geom  name="fingertip1" pos="-.003 0 0" />
+                <geom  name="fingertip2" pos=".003 0 0" />
+                <site  name="fingertip_touch"/>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+
+    <!-- props -->
+    <body name="ball" pos=".4 0 .4" childclass="object">
+      <joint name="ball_x" type="slide" axis="1 0 0" ref=".4"/>
+      <joint name="ball_z" type="slide" axis="0 0 1" ref=".4"/>
+      <joint name="ball_y" type="hinge" axis="0 1 0"/>
+      <geom  name="ball" type="sphere" size=".022" />
+      <site  name="ball" type="sphere"/>
+    </body>
+
+    <body name="peg" pos="-.4 0 .4" childclass="object">
+      <joint name="peg_x" type="slide" axis="1 0 0" ref="-.4"/>
+      <joint name="peg_z" type="slide" axis="0 0 1" ref=".4"/>
+      <joint name="peg_y" type="hinge" axis="0 1 0"/>
+      <geom name="blade" type="capsule" size=".005" fromto="0 0 -.013 0 0 -.113"/>
+      <geom name="guard" type="capsule" size=".005" fromto="-.017 0 -.043 .017 0 -.043"/>
+      <body name="pommel" pos="0 0 -.013">
+        <geom name="pommel" type="sphere" size=".009"/>
+      </body>
+      <site name="peg" type="box" pos="0 0 -.063"/>
+      <site name="peg_pinch" type="box" pos="0 0 -.025"/>
+      <site name="peg_grasp" type="box" pos="0 0 0"/>
+      <site name="peg_tip"   type="box" pos="0 0 -.113"/>
+    </body>
+
+    <!-- receptacles -->
+    <body name="slot" pos="-.405 0 .2" euler="0 20 0" childclass="obstacle">
+      <geom name="slot_0" type="box" pos="-.0252 0 -.083" size=".0198 .01 .035"/>
+      <geom name="slot_1" type="box" pos=" .0252 0 -.083" size=".0198 .01 .035"/>
+      <geom name="slot_2" type="box" pos="  0   0 -.138" size=".045 .01 .02"/>
+      <site name="slot" type="box" pos="0 0 0"/>
+      <site name="slot_end" type="box" pos="0 0 -.05"/>
+    </body>
+
+    <body name="cup" pos=".3 0 .4" euler="0 -15 0" childclass="obstacle">
+      <geom name="cup_0" type="capsule" size=".008" fromto="-.03 0 .06 -.03 0 -.015" />
+      <geom name="cup_1" type="capsule" size=".008" fromto="-.03 0 -.015 0 0 -.04" />
+      <geom name="cup_2" type="capsule" size=".008" fromto="0 0 -.04 .03 0 -.015" />
+      <geom name="cup_3" type="capsule" size=".008" fromto=".03 0 -.015 .03 0 .06" />
+      <site name="cup" size=".005"/>
+    </body>
+
+    <!-- targets -->
+    <body name="target_ball" pos=".4 .001 .4" childclass="ghost">
+      <geom  name="target_ball" type="sphere" size=".02" />
+      <site  name="target_ball" type="sphere"/>
+    </body>
+
+    <body name="target_peg" pos="-.2 .001 .4" childclass="ghost">
+      <geom name="target_blade" type="capsule" size=".005" fromto="0 0 -.013 0 0 -.113"/>
+      <geom name="target_guard" type="capsule" size=".005" fromto="-.017 0 -.043 .017 0 -.043"/>
+      <geom name="target_pommel" type="sphere" size=".009" pos="0 0 -.013"/>
+      <site name="target_peg" type="box" pos="0 0 -.063"/>
+      <site name="target_peg_pinch" type="box" pos="0 0 -.025"/>
+      <site name="target_peg_grasp" type="box" pos="0 0 0"/>
+      <site name="target_peg_tip"   type="box" pos="0 0 -.113"/>
+    </body>
+
+  </worldbody>
+
+  <tendon>
+    <fixed name="grasp">
+      <joint joint="thumb"  coef=".5"/>
+      <joint joint="finger" coef=".5"/>
+    </fixed>
+    <fixed name="coupling">
+      <joint joint="thumb"  coef="-.5"/>
+      <joint joint="finger" coef=".5"/>
+    </fixed>
+  </tendon>
+
+  <equality>
+    <tendon name="coupling" tendon1="coupling" solimp="0.95 0.99 0.001" solref=".005 .5"/>
+  </equality>
+
+  <sensor>
+    <touch name="palm_touch" site="palm_touch"/>
+    <touch name="finger_touch" site="finger_touch"/>
+    <touch name="thumb_touch" site="thumb_touch"/>
+    <touch name="fingertip_touch" site="fingertip_touch"/>
+    <touch name="thumbtip_touch" site="thumbtip_touch"/>
+  </sensor>
+
+  <actuator>
+    <motor name="root"     joint="arm_root"     ctrlrange="-1 1"  gear="12"/>
+    <motor name="shoulder" joint="arm_shoulder" ctrlrange="-1 1"  gear="8"/>
+    <motor name="elbow"    joint="arm_elbow"    ctrlrange="-1 1"  gear="4"/>
+    <motor name="wrist"    joint="arm_wrist"    ctrlrange="-1 1"  gear="2"/>
+    <motor name="grasp"    tendon="grasp"       ctrlrange="-1 1"  gear="2"/>
+  </actuator>
+
+</mujoco>
--- a/DPI/local_dm_control_suite/pendulum.py
+++ b/DPI/local_dm_control_suite/pendulum.py
@ -0,0 +1,114 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Pendulum domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.utils import containers
+from dm_control.utils import rewards
+import numpy as np
+
+
+_DEFAULT_TIME_LIMIT = 20
+_ANGLE_BOUND = 8
+_COSINE_BOUND = np.cos(np.deg2rad(_ANGLE_BOUND))
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('pendulum.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking')
+def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns pendulum swingup task ."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = SwingUp(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Pendulum domain."""
+
+  def pole_vertical(self):
+    """Returns vertical (z) component of pole frame."""
+    return self.named.data.xmat['pole', 'zz']
+
+  def angular_velocity(self):
+    """Returns the angular velocity of the pole."""
+    return self.named.data.qvel['hinge'].copy()
+
+  def pole_orientation(self):
+    """Returns both horizontal and vertical components of pole frame."""
+    return self.named.data.xmat['pole', ['zz', 'xz']]
+
+
+class SwingUp(base.Task):
+  """A Pendulum `Task` to swing up and balance the pole."""
+
+  def __init__(self, random=None):
+    """Initialize an instance of `Pendulum`.
+
+    Args:
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    super(SwingUp, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+
+    Pole is set to a random angle between [-pi, pi).
+
+    Args:
+      physics: An instance of `Physics`.
+
+    """
+    physics.named.data.qpos['hinge'] = self.random.uniform(-np.pi, np.pi)
+    super(SwingUp, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation.
+
+    Observations are states concatenating pole orientation and angular velocity
+    and pixels from fixed camera.
+
+    Args:
+      physics: An instance of `physics`, Pendulum physics.
+
+    Returns:
+      A `dict` of observation.
+    """
+    obs = collections.OrderedDict()
+    obs['orientation'] = physics.pole_orientation()
+    obs['velocity'] = physics.angular_velocity()
+    return obs
+
+  def get_reward(self, physics):
+    return rewards.tolerance(physics.pole_vertical(), (_COSINE_BOUND, 1))
--- a/DPI/local_dm_control_suite/pendulum.xml
+++ b/DPI/local_dm_control_suite/pendulum.xml
@ -0,0 +1,26 @@
+<mujoco model="pendulum">
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep="0.02">
+    <flag contact="disable" energy="enable"/>
+  </option>
+
+  <worldbody>
+    <light name="light" pos="0 0 2"/>
+    <geom name="floor" size="2 2 .2" type="plane" material="grid"/>
+    <camera name="fixed" pos="0 -1.5 2" xyaxes='1 0 0 0 1 1'/>
+    <camera name="lookat" mode="targetbodycom" target="pole" pos="0 -2 1"/>
+    <body name="pole" pos="0 0 .6">
+      <joint name="hinge" type="hinge" axis="0 1 0" damping="0.1"/>
+      <geom name="base" material="decoration" type="cylinder" fromto="0 -.03 0 0 .03 0" size="0.021" mass="0"/>
+      <geom name="pole" material="self" type="capsule" fromto="0 0 0 0 0 0.5" size="0.02" mass="0"/>
+      <geom name="mass" material="effector" type="sphere" pos="0 0 0.5" size="0.05" mass="1"/>
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor name="torque" joint="hinge" gear="1" ctrlrange="-1 1" ctrllimited="true"/>
+  </actuator>
+</mujoco>
--- a/DPI/local_dm_control_suite/point_mass.py
+++ b/DPI/local_dm_control_suite/point_mass.py
@ -0,0 +1,130 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Point-mass domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.suite.utils import randomizers
+from dm_control.utils import containers
+from dm_control.utils import rewards
+import numpy as np
+
+_DEFAULT_TIME_LIMIT = 20
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('point_mass.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking', 'easy')
+def easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the easy point_mass task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = PointMass(randomize_gains=False, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+@SUITE.add()
+def hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the hard point_mass task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = PointMass(randomize_gains=True, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """physics for the point_mass domain."""
+
+  def mass_to_target(self):
+    """Returns the vector from mass to target in global coordinate."""
+    return (self.named.data.geom_xpos['target'] -
+            self.named.data.geom_xpos['pointmass'])
+
+  def mass_to_target_dist(self):
+    """Returns the distance from mass to the target."""
+    return np.linalg.norm(self.mass_to_target())
+
+
+class PointMass(base.Task):
+  """A point_mass `Task` to reach target with smooth reward."""
+
+  def __init__(self, randomize_gains, random=None):
+    """Initialize an instance of `PointMass`.
+
+    Args:
+      randomize_gains: A `bool`, whether to randomize the actuator gains.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._randomize_gains = randomize_gains
+    super(PointMass, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+
+       If _randomize_gains is True, the relationship between the controls and
+       the joints is randomized, so that each control actuates a random linear
+       combination of joints.
+
+    Args:
+      physics: An instance of `mujoco.Physics`.
+    """
+    randomizers.randomize_limited_and_rotational_joints(physics, self.random)
+    if self._randomize_gains:
+      dir1 = self.random.randn(2)
+      dir1 /= np.linalg.norm(dir1)
+      # Find another actuation direction that is not 'too parallel' to dir1.
+      parallel = True
+      while parallel:
+        dir2 = self.random.randn(2)
+        dir2 /= np.linalg.norm(dir2)
+        parallel = abs(np.dot(dir1, dir2)) > 0.9
+      physics.model.wrap_prm[[0, 1]] = dir1
+      physics.model.wrap_prm[[2, 3]] = dir2
+    super(PointMass, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of the state."""
+    obs = collections.OrderedDict()
+    obs['position'] = physics.position()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+    target_size = physics.named.model.geom_size['target', 0]
+    near_target = rewards.tolerance(physics.mass_to_target_dist(),
+                                    bounds=(0, target_size), margin=target_size)
+    control_reward = rewards.tolerance(physics.control(), margin=1,
+                                       value_at_margin=0,
+                                       sigmoid='quadratic').mean()
+    small_control = (control_reward + 4) / 5
+    return near_target * small_control
--- a/DPI/local_dm_control_suite/point_mass.xml
+++ b/DPI/local_dm_control_suite/point_mass.xml
@ -0,0 +1,49 @@
+<mujoco model="planar point mass">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep="0.02">
+    <flag contact="disable"/>
+  </option>
+
+  <default>
+    <joint type="hinge" axis="0 0 1" limited="true" range="-.29 .29" damping="1"/>
+    <motor gear=".1" ctrlrange="-1 1" ctrllimited="true"/>
+  </default>
+
+  <worldbody>
+    <light name="light" pos="0 0 1"/>
+    <camera name="fixed" pos="0 0 .75" quat="1 0 0 0"/>
+    <geom name="ground" type="plane" pos="0 0 0" size=".3 .3 .1" material="grid"/>
+    <geom name="wall_x" type="plane" pos="-.3 0 .02" zaxis="1 0 0"  size=".02 .3 .02" material="decoration"/>
+    <geom name="wall_y" type="plane" pos="0 -.3 .02" zaxis="0 1 0"  size=".3 .02 .02" material="decoration"/>
+    <geom name="wall_neg_x" type="plane" pos=".3 0 .02" zaxis="-1 0 0"  size=".02 .3 .02" material="decoration"/>
+    <geom name="wall_neg_y" type="plane" pos="0 .3 .02" zaxis="0 -1 0"  size=".3 .02 .02" material="decoration"/>
+
+    <body name="pointmass" pos="0 0 .01">
+      <camera name="cam0" pos="0 -0.3 0.3" xyaxes="1 0 0 0 0.7 0.7"/>
+      <joint name="root_x" type="slide"  pos="0 0 0" axis="1 0 0" />
+      <joint name="root_y" type="slide"  pos="0 0 0" axis="0 1 0" />
+      <geom name="pointmass" type="sphere" size=".01" material="self" mass=".3"/>
+    </body>
+
+    <geom name="target" pos="0 0 .01" material="target" type="sphere" size=".015"/>
+  </worldbody>
+
+  <tendon>
+    <fixed name="t1">
+      <joint joint="root_x" coef="1"/>
+      <joint joint="root_y" coef="0"/>
+    </fixed>
+    <fixed name="t2">
+      <joint joint="root_x" coef="0"/>
+      <joint joint="root_y" coef="1"/>
+    </fixed>
+  </tendon>
+
+  <actuator>
+    <motor name="t1" tendon="t1"/>
+    <motor name="t2" tendon="t2"/>
+  </actuator>
+</mujoco>
--- a/DPI/local_dm_control_suite/quadruped.py
+++ b/DPI/local_dm_control_suite/quadruped.py
@ -0,0 +1,480 @@
+# Copyright 2019 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Quadruped Domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.mujoco.wrapper import mjbindings
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.utils import containers
+from dm_control.utils import rewards
+from dm_control.utils import xml_tools
+
+from lxml import etree
+import numpy as np
+from scipy import ndimage
+
+enums = mjbindings.enums
+mjlib = mjbindings.mjlib
+
+
+_DEFAULT_TIME_LIMIT = 20
+_CONTROL_TIMESTEP = .02
+
+# Horizontal speeds above which the move reward is 1.
+_RUN_SPEED = 5
+_WALK_SPEED = 0.5
+
+# Constants related to terrain generation.
+_HEIGHTFIELD_ID = 0
+_TERRAIN_SMOOTHNESS = 0.15  # 0.0: maximally bumpy; 1.0: completely smooth.
+_TERRAIN_BUMP_SCALE = 2  # Spatial scale of terrain bumps (in meters).
+
+# Named model elements.
+_TOES = ['toe_front_left', 'toe_back_left', 'toe_back_right', 'toe_front_right']
+_WALLS = ['wall_px', 'wall_py', 'wall_nx', 'wall_ny']
+
+SUITE = containers.TaggedTasks()
+
+
+def make_model(floor_size=None, terrain=False, rangefinders=False,
+               walls_and_ball=False):
+  """Returns the model XML string."""
+  xml_string = common.read_model('quadruped.xml')
+  parser = etree.XMLParser(remove_blank_text=True)
+  mjcf = etree.XML(xml_string, parser)
+
+  # Set floor size.
+  if floor_size is not None:
+    floor_geom = mjcf.find('.//geom[@name={!r}]'.format('floor'))
+    floor_geom.attrib['size'] = '{} {} .5'.format(floor_size, floor_size)
+
+  # Remove walls, ball and target.
+  if not walls_and_ball:
+    for wall in _WALLS:
+      wall_geom = xml_tools.find_element(mjcf, 'geom', wall)
+      wall_geom.getparent().remove(wall_geom)
+
+    # Remove ball.
+    ball_body = xml_tools.find_element(mjcf, 'body', 'ball')
+    ball_body.getparent().remove(ball_body)
+
+    # Remove target.
+    target_site = xml_tools.find_element(mjcf, 'site', 'target')
+    target_site.getparent().remove(target_site)
+
+  # Remove terrain.
+  if not terrain:
+    terrain_geom = xml_tools.find_element(mjcf, 'geom', 'terrain')
+    terrain_geom.getparent().remove(terrain_geom)
+
+  # Remove rangefinders if they're not used, as range computations can be
+  # expensive, especially in a scene with heightfields.
+  if not rangefinders:
+    rangefinder_sensors = mjcf.findall('.//rangefinder')
+    for rf in rangefinder_sensors:
+      rf.getparent().remove(rf)
+
+  return etree.tostring(mjcf, pretty_print=True)
+
+
+@SUITE.add()
+def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Walk task."""
+  xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
+  physics = Physics.from_xml_string(xml_string, common.ASSETS)
+  task = Move(desired_speed=_WALK_SPEED, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(physics, task, time_limit=time_limit,
+                             control_timestep=_CONTROL_TIMESTEP,
+                             **environment_kwargs)
+
+
+@SUITE.add()
+def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Run task."""
+  xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _RUN_SPEED)
+  physics = Physics.from_xml_string(xml_string, common.ASSETS)
+  task = Move(desired_speed=_RUN_SPEED, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(physics, task, time_limit=time_limit,
+                             control_timestep=_CONTROL_TIMESTEP,
+                             **environment_kwargs)
+
+
+@SUITE.add()
+def escape(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+           environment_kwargs=None):
+  """Returns the Escape task."""
+  xml_string = make_model(floor_size=40, terrain=True, rangefinders=True)
+  physics = Physics.from_xml_string(xml_string, common.ASSETS)
+  task = Escape(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(physics, task, time_limit=time_limit,
+                             control_timestep=_CONTROL_TIMESTEP,
+                             **environment_kwargs)
+
+
+@SUITE.add()
+def fetch(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Fetch task."""
+  xml_string = make_model(walls_and_ball=True)
+  physics = Physics.from_xml_string(xml_string, common.ASSETS)
+  task = Fetch(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(physics, task, time_limit=time_limit,
+                             control_timestep=_CONTROL_TIMESTEP,
+                             **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Quadruped domain."""
+
+  def _reload_from_data(self, data):
+    super(Physics, self)._reload_from_data(data)
+    # Clear cached sensor names when the physics is reloaded.
+    self._sensor_types_to_names = {}
+    self._hinge_names = []
+
+  def _get_sensor_names(self, *sensor_types):
+    try:
+      sensor_names = self._sensor_types_to_names[sensor_types]
+    except KeyError:
+      [sensor_ids] = np.where(np.in1d(self.model.sensor_type, sensor_types))
+      sensor_names = [self.model.id2name(s_id, 'sensor') for s_id in sensor_ids]
+      self._sensor_types_to_names[sensor_types] = sensor_names
+    return sensor_names
+
+  def torso_upright(self):
+    """Returns the dot-product of the torso z-axis and the global z-axis."""
+    return np.asarray(self.named.data.xmat['torso', 'zz'])
+
+  def torso_velocity(self):
+    """Returns the velocity of the torso, in the local frame."""
+    return self.named.data.sensordata['velocimeter'].copy()
+
+  def egocentric_state(self):
+    """Returns the state without global orientation or position."""
+    if not self._hinge_names:
+      [hinge_ids] = np.nonzero(self.model.jnt_type ==
+                               enums.mjtJoint.mjJNT_HINGE)
+      self._hinge_names = [self.model.id2name(j_id, 'joint')
+                           for j_id in hinge_ids]
+    return np.hstack((self.named.data.qpos[self._hinge_names],
+                      self.named.data.qvel[self._hinge_names],
+                      self.data.act))
+
+  def toe_positions(self):
+    """Returns toe positions in egocentric frame."""
+    torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
+    torso_pos = self.named.data.xpos['torso']
+    torso_to_toe = self.named.data.xpos[_TOES] - torso_pos
+    return torso_to_toe.dot(torso_frame)
+
+  def force_torque(self):
+    """Returns scaled force/torque sensor readings at the toes."""
+    force_torque_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_FORCE,
+                                                  enums.mjtSensor.mjSENS_TORQUE)
+    return np.arcsinh(self.named.data.sensordata[force_torque_sensors])
+
+  def imu(self):
+    """Returns IMU-like sensor readings."""
+    imu_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_GYRO,
+                                         enums.mjtSensor.mjSENS_ACCELEROMETER)
+    return self.named.data.sensordata[imu_sensors]
+
+  def rangefinder(self):
+    """Returns scaled rangefinder sensor readings."""
+    rf_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_RANGEFINDER)
+    rf_readings = self.named.data.sensordata[rf_sensors]
+    no_intersection = -1.0
+    return np.where(rf_readings == no_intersection, 1.0, np.tanh(rf_readings))
+
+  def origin_distance(self):
+    """Returns the distance from the origin to the workspace."""
+    return np.asarray(np.linalg.norm(self.named.data.site_xpos['workspace']))
+
+  def origin(self):
+    """Returns origin position in the torso frame."""
+    torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
+    torso_pos = self.named.data.xpos['torso']
+    return -torso_pos.dot(torso_frame)
+
+  def ball_state(self):
+    """Returns ball position and velocity relative to the torso frame."""
+    data = self.named.data
+    torso_frame = data.xmat['torso'].reshape(3, 3)
+    ball_rel_pos = data.xpos['ball'] - data.xpos['torso']
+    ball_rel_vel = data.qvel['ball_root'][:3] - data.qvel['root'][:3]
+    ball_rot_vel = data.qvel['ball_root'][3:]
+    ball_state = np.vstack((ball_rel_pos, ball_rel_vel, ball_rot_vel))
+    return ball_state.dot(torso_frame).ravel()
+
+  def target_position(self):
+    """Returns target position in torso frame."""
+    torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
+    torso_pos = self.named.data.xpos['torso']
+    torso_to_target = self.named.data.site_xpos['target'] - torso_pos
+    return torso_to_target.dot(torso_frame)
+
+  def ball_to_target_distance(self):
+    """Returns horizontal distance from the ball to the target."""
+    ball_to_target = (self.named.data.site_xpos['target'] -
+                      self.named.data.xpos['ball'])
+    return np.linalg.norm(ball_to_target[:2])
+
+  def self_to_ball_distance(self):
+    """Returns horizontal distance from the quadruped workspace to the ball."""
+    self_to_ball = (self.named.data.site_xpos['workspace']
+                    -self.named.data.xpos['ball'])
+    return np.linalg.norm(self_to_ball[:2])
+
+
+def _find_non_contacting_height(physics, orientation, x_pos=0.0, y_pos=0.0):
+  """Find a height with no contacts given a body orientation.
+
+  Args:
+    physics: An instance of `Physics`.
+    orientation: A quaternion.
+    x_pos: A float. Position along global x-axis.
+    y_pos: A float. Position along global y-axis.
+  Raises:
+    RuntimeError: If a non-contacting configuration has not been found after
+    10,000 attempts.
+  """
+  z_pos = 0.0  # Start embedded in the floor.
+  num_contacts = 1
+  num_attempts = 0
+  # Move up in 1cm increments until no contacts.
+  while num_contacts > 0:
+    try:
+      with physics.reset_context():
+        physics.named.data.qpos['root'][:3] = x_pos, y_pos, z_pos
+        physics.named.data.qpos['root'][3:] = orientation
+    except control.PhysicsError:
+      # We may encounter a PhysicsError here due to filling the contact
+      # buffer, in which case we simply increment the height and continue.
+      pass
+    num_contacts = physics.data.ncon
+    z_pos += 0.01
+    num_attempts += 1
+    if num_attempts > 10000:
+      raise RuntimeError('Failed to find a non-contacting configuration.')
+
+
+def _common_observations(physics):
+  """Returns the observations common to all tasks."""
+  obs = collections.OrderedDict()
+  obs['egocentric_state'] = physics.egocentric_state()
+  obs['torso_velocity'] = physics.torso_velocity()
+  obs['torso_upright'] = physics.torso_upright()
+  obs['imu'] = physics.imu()
+  obs['force_torque'] = physics.force_torque()
+  return obs
+
+
+def _upright_reward(physics, deviation_angle=0):
+  """Returns a reward proportional to how upright the torso is.
+
+  Args:
+    physics: an instance of `Physics`.
+    deviation_angle: A float, in degrees. The reward is 0 when the torso is
+      exactly upside-down and 1 when the torso's z-axis is less than
+      `deviation_angle` away from the global z-axis.
+  """
+  deviation = np.cos(np.deg2rad(deviation_angle))
+  return rewards.tolerance(
+      physics.torso_upright(),
+      bounds=(deviation, float('inf')),
+      sigmoid='linear',
+      margin=1 + deviation,
+      value_at_margin=0)
+
+
+class Move(base.Task):
+  """A quadruped task solved by moving forward at a designated speed."""
+
+  def __init__(self, desired_speed, random=None):
+    """Initializes an instance of `Move`.
+
+    Args:
+      desired_speed: A float. If this value is zero, reward is given simply
+        for standing upright. Otherwise this specifies the horizontal velocity
+        at which the velocity-dependent reward component is maximized.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._desired_speed = desired_speed
+    super(Move, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+
+    Args:
+      physics: An instance of `Physics`.
+
+    """
+    # Initial configuration.
+    orientation = self.random.randn(4)
+    orientation /= np.linalg.norm(orientation)
+    _find_non_contacting_height(physics, orientation)
+    super(Move, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation to the agent."""
+    return _common_observations(physics)
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+
+    # Move reward term.
+    move_reward = rewards.tolerance(
+        physics.torso_velocity()[0],
+        bounds=(self._desired_speed, float('inf')),
+        margin=self._desired_speed,
+        value_at_margin=0.5,
+        sigmoid='linear')
+
+    return _upright_reward(physics) * move_reward
+
+
+class Escape(base.Task):
+  """A quadruped task solved by escaping a bowl-shaped terrain."""
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+
+    Args:
+      physics: An instance of `Physics`.
+
+    """
+    # Get heightfield resolution, assert that it is square.
+    res = physics.model.hfield_nrow[_HEIGHTFIELD_ID]
+    assert res == physics.model.hfield_ncol[_HEIGHTFIELD_ID]
+    # Sinusoidal bowl shape.
+    row_grid, col_grid = np.ogrid[-1:1:res*1j, -1:1:res*1j]
+    radius = np.clip(np.sqrt(col_grid**2 + row_grid**2), .04, 1)
+    bowl_shape = .5 - np.cos(2*np.pi*radius)/2
+    # Random smooth bumps.
+    terrain_size = 2 * physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
+    bump_res = int(terrain_size / _TERRAIN_BUMP_SCALE)
+    bumps = self.random.uniform(_TERRAIN_SMOOTHNESS, 1, (bump_res, bump_res))
+    smooth_bumps = ndimage.zoom(bumps, res / float(bump_res))
+    # Terrain is elementwise product.
+    terrain = bowl_shape * smooth_bumps
+    start_idx = physics.model.hfield_adr[_HEIGHTFIELD_ID]
+    physics.model.hfield_data[start_idx:start_idx+res**2] = terrain.ravel()
+    super(Escape, self).initialize_episode(physics)
+
+    # If we have a rendering context, we need to re-upload the modified
+    # heightfield data.
+    if physics.contexts:
+      with physics.contexts.gl.make_current() as ctx:
+        ctx.call(mjlib.mjr_uploadHField,
+                 physics.model.ptr,
+                 physics.contexts.mujoco.ptr,
+                 _HEIGHTFIELD_ID)
+
+    # Initial configuration.
+    orientation = self.random.randn(4)
+    orientation /= np.linalg.norm(orientation)
+    _find_non_contacting_height(physics, orientation)
+
+  def get_observation(self, physics):
+    """Returns an observation to the agent."""
+    obs = _common_observations(physics)
+    obs['origin'] = physics.origin()
+    obs['rangefinder'] = physics.rangefinder()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+
+    # Escape reward term.
+    terrain_size = physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
+    escape_reward = rewards.tolerance(
+        physics.origin_distance(),
+        bounds=(terrain_size, float('inf')),
+        margin=terrain_size,
+        value_at_margin=0,
+        sigmoid='linear')
+
+    return _upright_reward(physics, deviation_angle=20) * escape_reward
+
+
+class Fetch(base.Task):
+  """A quadruped task solved by bringing a ball to the origin."""
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+
+    Args:
+      physics: An instance of `Physics`.
+
+    """
+    # Initial configuration, random azimuth and horizontal position.
+    azimuth = self.random.uniform(0, 2*np.pi)
+    orientation = np.array((np.cos(azimuth/2), 0, 0, np.sin(azimuth/2)))
+    spawn_radius = 0.9 * physics.named.model.geom_size['floor', 0]
+    x_pos, y_pos = self.random.uniform(-spawn_radius, spawn_radius, size=(2,))
+    _find_non_contacting_height(physics, orientation, x_pos, y_pos)
+
+    # Initial ball state.
+    physics.named.data.qpos['ball_root'][:2] = self.random.uniform(
+        -spawn_radius, spawn_radius, size=(2,))
+    physics.named.data.qpos['ball_root'][2] = 2
+    physics.named.data.qvel['ball_root'][:2] = 5*self.random.randn(2)
+    super(Fetch, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation to the agent."""
+    obs = _common_observations(physics)
+    obs['ball_state'] = physics.ball_state()
+    obs['target_position'] = physics.target_position()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+
+    # Reward for moving close to the ball.
+    arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2)
+    workspace_radius = physics.named.model.site_size['workspace', 0]
+    ball_radius = physics.named.model.geom_size['ball', 0]
+    reach_reward = rewards.tolerance(
+        physics.self_to_ball_distance(),
+        bounds=(0, workspace_radius+ball_radius),
+        sigmoid='linear',
+        margin=arena_radius, value_at_margin=0)
+
+    # Reward for bringing the ball to the target.
+    target_radius = physics.named.model.site_size['target', 0]
+    fetch_reward = rewards.tolerance(
+        physics.ball_to_target_distance(),
+        bounds=(0, target_radius),
+        sigmoid='linear',
+        margin=arena_radius, value_at_margin=0)
+
+    reach_then_fetch = reach_reward * (0.5 + 0.5*fetch_reward)
+
+    return _upright_reward(physics) * reach_then_fetch
--- a/DPI/local_dm_control_suite/quadruped.xml
+++ b/DPI/local_dm_control_suite/quadruped.xml
@ -0,0 +1,329 @@
+<mujoco model="quadruped">
+
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <visual>
+    <rgba rangefinder="1 1 0.1 0.1"/>
+    <map znear=".005" zfar="20"/>
+  </visual>
+
+  <asset>
+    <hfield name="terrain" ncol="201" nrow="201" size="30 30 5 .1"/>
+  </asset>
+
+  <option timestep=".005"/>
+
+  <default>
+    <geom solimp=".9 .99 .003" solref=".01 1"/>
+    <default class="body">
+      <geom  type="capsule" size=".08" condim="1" material="self" density="500"/>
+      <joint type="hinge" damping="30" armature=".01"
+             limited="true" solimplimit="0 .99 .01"/>
+      <default class="hip">
+        <default class="yaw">
+          <joint axis="0 0 1" range="-50 50"/>
+        </default>
+        <default class="pitch">
+          <joint axis="0 1 0" range="-20 60"/>
+        </default>
+        <geom fromto="0 0 0 .3 0 .11"/>
+      </default>
+      <default class="knee">
+        <joint axis="0 1 0" range="-60 50"/>
+        <geom size=".065" fromto="0 0 0 .25 0 -.25"/>
+      </default>
+      <default class="ankle">
+        <joint axis="0 1 0" range="-45 55"/>
+        <geom size=".055" fromto="0 0 0 0 0 -.25"/>
+      </default>
+      <default class="toe">
+        <geom type="sphere" size=".08" material="effector" friction="1.5"/>
+        <site type="sphere" size=".084" material="site"  group="4"/>
+      </default>
+    </default>
+    <default class="rangefinder">
+      <site type="capsule" size=".005 .1" material="site" group="4"/>
+    </default>
+    <default class="wall">
+      <geom type="plane" material="decoration"/>
+    </default>
+
+    <default class="coupling">
+      <equality solimp="0.95 0.99 0.01" solref=".005 .5"/>
+    </default>
+
+    <general ctrllimited="true" gainprm="1000" biasprm="0 -1000" biastype="affine" dyntype="filter" dynprm=".1"/>
+    <default class="yaw_act">
+      <general ctrlrange="-1 1"/>
+    </default>
+    <default class="lift_act">
+      <general ctrlrange="-1 1.1"/>
+    </default>
+    <default class="extend_act">
+      <general ctrlrange="-.8 .8"/>
+    </default>
+  </default>
+
+  <asset>
+    <texture name="ball" builtin="checker" mark="cross" width="151" height="151"
+             rgb1="0.1 0.1 0.1" rgb2="0.9 0.9 0.9" markrgb="1 1 1"/>
+    <material name="ball" texture="ball" />
+  </asset>
+
+
+  <worldbody>
+    <geom name="floor" type="plane" size="15 15 .5" material="grid"/>
+    <geom name="wall_px" class="wall" pos="-15.7 0 .7" zaxis="1 0 1"  size="1 15 .5"/>
+    <geom name="wall_py" class="wall" pos="0 -15.7 .7" zaxis="0 1 1"  size="15 1 .5"/>
+    <geom name="wall_nx" class="wall" pos="15.7 0 .7" zaxis="-1 0 1"  size="1 15 .5"/>
+    <geom name="wall_ny" class="wall" pos="0 15.7 .7" zaxis="0 -1 1"  size="15 1 .5"/>
+    <site name="target" type="cylinder" size=".4 .06" pos="0 0 .05" material="target"/>
+
+    <geom name="terrain" type="hfield" hfield="terrain" rgba=".2 .3 .4 1" pos="0 0 -.01"/>
+
+    <camera name="global"  pos="-10 10 10" xyaxes="-1 -1 0 1 0 1" mode="trackcom"/>
+    <body name="torso" childclass="body" pos="0 0 .57">
+      <freejoint name="root"/>
+
+      <camera name="x"  pos="-1.7 0 1" xyaxes="0 -1 0 .75 0 1" mode="trackcom"/>
+      <camera name="y"  pos="0 4 2" xyaxes="-1 0 0 0 -.5 1" mode="trackcom"/>
+      <camera name="egocentric"  pos=".3 0 .11" xyaxes="0 -1 0 .4 0 1" fovy="60"/>
+      <light name="light" pos="0 0 4" mode="trackcom"/>
+
+      <geom name="eye_r" type="cylinder" size=".05"  fromto=".1 -.07 .12 .31 -.07 .08" mass="0"/>
+      <site name="pupil_r" type="sphere" size=".033"  pos=".3 -.07 .08" zaxis="1 0 0" material="eye"/>
+      <geom name="eye_l" type="cylinder" size=".05"  fromto=".1 .07 .12 .31 .07 .08" mass="0"/>
+      <site name="pupil_l" type="sphere" size=".033"  pos=".3 .07 .08" zaxis="1 0 0" material="eye"/>
+      <site name="workspace" type="sphere" size=".3 .3 .3"  material="site" pos=".8 0 -.2" group="3"/>
+
+      <site name="rf_00" class="rangefinder" fromto=".41 -.02  .11 .34 0 .115"/>
+      <site name="rf_01" class="rangefinder" fromto=".41 -.01  .11 .34 0 .115"/>
+      <site name="rf_02" class="rangefinder" fromto=".41   0   .11 .34 0 .115"/>
+      <site name="rf_03" class="rangefinder" fromto=".41  .01  .11 .34 0 .115"/>
+      <site name="rf_04" class="rangefinder" fromto=".41  .02  .11 .34 0 .115"/>
+      <site name="rf_10" class="rangefinder" fromto=".41 -.02  .1  .36 0 .11"/>
+      <site name="rf_11" class="rangefinder" fromto=".41 -.02  .1  .36 0 .11"/>
+      <site name="rf_12" class="rangefinder" fromto=".41   0   .1  .36 0 .11"/>
+      <site name="rf_13" class="rangefinder" fromto=".41  .01  .1  .36 0 .11"/>
+      <site name="rf_14" class="rangefinder" fromto=".41  .02  .1  .36 0 .11"/>
+      <site name="rf_20" class="rangefinder" fromto=".41 -.02  .09 .38 0 .105"/>
+      <site name="rf_21" class="rangefinder" fromto=".41 -.01  .09 .38 0 .105"/>
+      <site name="rf_22" class="rangefinder" fromto=".41   0   .09 .38 0 .105"/>
+      <site name="rf_23" class="rangefinder" fromto=".41  .01  .09 .38 0 .105"/>
+      <site name="rf_24" class="rangefinder" fromto=".41  .02  .09 .38 0 .105"/>
+      <site name="rf_30" class="rangefinder" fromto=".41 -.02  .08 .4  0 .1"/>
+      <site name="rf_31" class="rangefinder" fromto=".41 -.01  .08 .4  0 .1"/>
+      <site name="rf_32" class="rangefinder" fromto=".41   0   .08 .4  0 .1"/>
+      <site name="rf_33" class="rangefinder" fromto=".41  .01  .08 .4  0 .1"/>
+      <site name="rf_34" class="rangefinder" fromto=".41  .02  .08 .4  0 .1"/>
+
+      <geom name="torso" type="ellipsoid" size=".3 .27 .2" density="1000"/>
+      <site name="torso_touch" type="box" size=".26 .26 .26" rgba="0 0 1 0"/>
+      <site name="torso" size=".05" rgba="1 0 0 1" />
+
+      <body name="hip_front_left" pos=".2 .2 0" euler="0 0 45" childclass="hip">
+        <joint name="yaw_front_left" class="yaw"/>
+        <joint name="pitch_front_left" class="pitch"/>
+        <geom name="thigh_front_left"/>
+        <body name="knee_front_left" pos=".3 0 .11" childclass="knee">
+          <joint name="knee_front_left"/>
+          <geom name="shin_front_left"/>
+          <body name="ankle_front_left" pos=".25 0 -.25" childclass="ankle">
+            <joint name="ankle_front_left"/>
+            <geom name="foot_front_left"/>
+            <body name="toe_front_left" pos="0 0 -.3" childclass="toe">
+              <geom name="toe_front_left"/>
+              <site name="toe_front_left"/>
+            </body>
+          </body>
+        </body>
+      </body>
+
+      <body name="hip_front_right" pos=".2 -.2 0" euler="0 0 -45" childclass="hip">
+        <joint name="yaw_front_right" class="yaw"/>
+        <joint name="pitch_front_right" class="pitch"/>
+        <geom name="thigh_front_right"/>
+        <body name="knee_front_right" pos=".3 0 .11" childclass="knee">
+          <joint name="knee_front_right"/>
+          <geom name="shin_front_right"/>
+          <body name="ankle_front_right" pos=".25 0 -.25" childclass="ankle">
+            <joint name="ankle_front_right"/>
+            <geom name="foot_front_right"/>
+            <body name="toe_front_right" pos="0 0 -.3" childclass="toe">
+              <geom name="toe_front_right"/>
+              <site name="toe_front_right"/>
+            </body>
+          </body>
+        </body>
+      </body>
+
+      <body name="hip_back_right" pos="-.2 -.2 0" euler="0 0 -135" childclass="hip">
+        <joint name="yaw_back_right" class="yaw"/>
+        <joint name="pitch_back_right" class="pitch"/>
+        <geom name="thigh_back_right"/>
+        <body name="knee_back_right" pos=".3 0 .11" childclass="knee">
+          <joint name="knee_back_right"/>
+          <geom name="shin_back_right"/>
+          <body name="ankle_back_right" pos=".25 0 -.25" childclass="ankle">
+            <joint name="ankle_back_right"/>
+            <geom name="foot_back_right"/>
+            <body name="toe_back_right" pos="0 0 -.3" childclass="toe">
+              <geom name="toe_back_right"/>
+              <site name="toe_back_right"/>
+            </body>
+          </body>
+        </body>
+      </body>
+
+      <body name="hip_back_left" pos="-.2 .2 0" euler="0 0 135" childclass="hip">
+        <joint name="yaw_back_left" class="yaw"/>
+        <joint name="pitch_back_left" class="pitch"/>
+        <geom name="thigh_back_left"/>
+        <body name="knee_back_left" pos=".3 0 .11" childclass="knee">
+          <joint name="knee_back_left"/>
+          <geom name="shin_back_left"/>
+          <body name="ankle_back_left" pos=".25 0 -.25" childclass="ankle">
+            <joint name="ankle_back_left"/>
+            <geom name="foot_back_left"/>
+            <body name="toe_back_left" pos="0 0 -.3" childclass="toe">
+              <geom name="toe_back_left"/>
+              <site name="toe_back_left"/>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+
+    <body name="ball" pos="0 0 3">
+      <freejoint name="ball_root"/>
+      <geom name="ball" size=".15" material="ball" priority="1" condim="6" friction=".7 .005 .005"
+            solref="-10000 -30"/>
+      <light name="ball_light" pos="0 0 4" mode="trackcom"/>
+    </body>
+
+  </worldbody>
+
+  <tendon>
+    <fixed name="coupling_front_left">
+      <joint joint="pitch_front_left"      coef=".333"/>
+      <joint joint="knee_front_left"       coef=".333"/>
+      <joint joint="ankle_front_left"      coef=".333"/>
+    </fixed>
+    <fixed name="coupling_front_right">
+      <joint joint="pitch_front_right"      coef=".333"/>
+      <joint joint="knee_front_right"       coef=".333"/>
+      <joint joint="ankle_front_right"      coef=".333"/>
+    </fixed>
+    <fixed name="coupling_back_right">
+      <joint joint="pitch_back_right"      coef=".333"/>
+      <joint joint="knee_back_right"       coef=".333"/>
+      <joint joint="ankle_back_right"      coef=".333"/>
+    </fixed>
+    <fixed name="coupling_back_left">
+      <joint joint="pitch_back_left"      coef=".333"/>
+      <joint joint="knee_back_left"       coef=".333"/>
+      <joint joint="ankle_back_left"      coef=".333"/>
+    </fixed>
+
+    <fixed name="extend_front_left">
+      <joint joint="pitch_front_left"      coef=".25"/>
+      <joint joint="knee_front_left"       coef="-.5"/>
+      <joint joint="ankle_front_left"      coef=".25"/>
+    </fixed>
+    <fixed name="lift_front_left">
+      <joint joint="pitch_front_left"      coef=".5"/>
+      <joint joint="ankle_front_left"      coef="-.5"/>
+    </fixed>
+
+    <fixed name="extend_front_right">
+      <joint joint="pitch_front_right"     coef=".25"/>
+      <joint joint="knee_front_right"      coef="-.5"/>
+      <joint joint="ankle_front_right"     coef=".25"/>
+    </fixed>
+    <fixed name="lift_front_right">
+      <joint joint="pitch_front_right"     coef=".5"/>
+      <joint joint="ankle_front_right"     coef="-.5"/>
+    </fixed>
+
+    <fixed name="extend_back_right">
+      <joint joint="pitch_back_right"     coef=".25"/>
+      <joint joint="knee_back_right"      coef="-.5"/>
+      <joint joint="ankle_back_right"     coef=".25"/>
+    </fixed>
+    <fixed name="lift_back_right">
+      <joint joint="pitch_back_right"     coef=".5"/>
+      <joint joint="ankle_back_right"     coef="-.5"/>
+    </fixed>
+
+    <fixed name="extend_back_left">
+      <joint joint="pitch_back_left"      coef=".25"/>
+      <joint joint="knee_back_left"       coef="-.5"/>
+      <joint joint="ankle_back_left"      coef=".25"/>
+    </fixed>
+    <fixed name="lift_back_left">
+      <joint joint="pitch_back_left"     coef=".5"/>
+      <joint joint="ankle_back_left"     coef="-.5"/>
+    </fixed>
+  </tendon>
+
+  <equality>
+    <tendon name="coupling_front_left" tendon1="coupling_front_left" class="coupling"/>
+    <tendon name="coupling_front_right" tendon1="coupling_front_right" class="coupling"/>
+    <tendon name="coupling_back_right" tendon1="coupling_back_right" class="coupling"/>
+    <tendon name="coupling_back_left" tendon1="coupling_back_left" class="coupling"/>
+  </equality>
+
+  <actuator>
+    <general name="yaw_front_left" class="yaw_act" joint="yaw_front_left"/>
+    <general name="lift_front_left" class="lift_act" tendon="lift_front_left"/>
+    <general name="extend_front_left" class="extend_act" tendon="extend_front_left"/>
+    <general name="yaw_front_right" class="yaw_act" joint="yaw_front_right"/>
+    <general name="lift_front_right" class="lift_act" tendon="lift_front_right"/>
+    <general name="extend_front_right" class="extend_act" tendon="extend_front_right"/>
+    <general name="yaw_back_right" class="yaw_act" joint="yaw_back_right"/>
+    <general name="lift_back_right" class="lift_act" tendon="lift_back_right"/>
+    <general name="extend_back_right" class="extend_act" tendon="extend_back_right"/>
+    <general name="yaw_back_left" class="yaw_act" joint="yaw_back_left"/>
+    <general name="lift_back_left" class="lift_act" tendon="lift_back_left"/>
+    <general name="extend_back_left" class="extend_act" tendon="extend_back_left"/>
+  </actuator>
+
+  <sensor>
+    <accelerometer name="imu_accel" site="torso"/>
+    <gyro name="imu_gyro" site="torso"/>
+    <velocimeter name="velocimeter" site="torso"/>
+    <force name="force_toe_front_left" site="toe_front_left"/>
+    <force name="force_toe_front_right" site="toe_front_right"/>
+    <force name="force_toe_back_right" site="toe_back_right"/>
+    <force name="force_toe_back_left" site="toe_back_left"/>
+    <torque name="torque_toe_front_left" site="toe_front_left"/>
+    <torque name="torque_toe_front_right" site="toe_front_right"/>
+    <torque name="torque_toe_back_right" site="toe_back_right"/>
+    <torque name="torque_toe_back_left" site="toe_back_left"/>
+    <subtreecom name="center_of_mass" body="torso"/>
+    <rangefinder name="rf_00" site="rf_00"/>
+    <rangefinder name="rf_01" site="rf_01"/>
+    <rangefinder name="rf_02" site="rf_02"/>
+    <rangefinder name="rf_03" site="rf_03"/>
+    <rangefinder name="rf_04" site="rf_04"/>
+    <rangefinder name="rf_10" site="rf_10"/>
+    <rangefinder name="rf_11" site="rf_11"/>
+    <rangefinder name="rf_12" site="rf_12"/>
+    <rangefinder name="rf_13" site="rf_13"/>
+    <rangefinder name="rf_14" site="rf_14"/>
+    <rangefinder name="rf_20" site="rf_20"/>
+    <rangefinder name="rf_21" site="rf_21"/>
+    <rangefinder name="rf_22" site="rf_22"/>
+    <rangefinder name="rf_23" site="rf_23"/>
+    <rangefinder name="rf_24" site="rf_24"/>
+    <rangefinder name="rf_30" site="rf_30"/>
+    <rangefinder name="rf_31" site="rf_31"/>
+    <rangefinder name="rf_32" site="rf_32"/>
+    <rangefinder name="rf_33" site="rf_33"/>
+    <rangefinder name="rf_34" site="rf_34"/>
+  </sensor>
+
+</mujoco>
+
--- a/DPI/local_dm_control_suite/reacher.py
+++ b/DPI/local_dm_control_suite/reacher.py
@ -0,0 +1,116 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Reacher domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.suite.utils import randomizers
+from dm_control.utils import containers
+from dm_control.utils import rewards
+import numpy as np
+
+SUITE = containers.TaggedTasks()
+_DEFAULT_TIME_LIMIT = 20
+_BIG_TARGET = .05
+_SMALL_TARGET = .015
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('reacher.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking', 'easy')
+def easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns reacher with sparse reward with 5e-2 tol and randomized target."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Reacher(target_size=_BIG_TARGET, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns reacher with sparse reward with 1e-2 tol and randomized target."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = Reacher(target_size=_SMALL_TARGET, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Reacher domain."""
+
+  def finger_to_target(self):
+    """Returns the vector from target to finger in global coordinates."""
+    return (self.named.data.geom_xpos['target', :2] -
+            self.named.data.geom_xpos['finger', :2])
+
+  def finger_to_target_dist(self):
+    """Returns the signed distance between the finger and target surface."""
+    return np.linalg.norm(self.finger_to_target())
+
+
+class Reacher(base.Task):
+  """A reacher `Task` to reach the target."""
+
+  def __init__(self, target_size, random=None):
+    """Initialize an instance of `Reacher`.
+
+    Args:
+      target_size: A `float`, tolerance to determine whether finger reached the
+          target.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._target_size = target_size
+    super(Reacher, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode."""
+    physics.named.model.geom_size['target', 0] = self._target_size
+    randomizers.randomize_limited_and_rotational_joints(physics, self.random)
+
+    # Randomize target position
+    angle = self.random.uniform(0, 2 * np.pi)
+    radius = self.random.uniform(.05, .20)
+    physics.named.model.geom_pos['target', 'x'] = radius * np.sin(angle)
+    physics.named.model.geom_pos['target', 'y'] = radius * np.cos(angle)
+
+    super(Reacher, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of the state and the target position."""
+    obs = collections.OrderedDict()
+    obs['position'] = physics.position()
+    obs['to_target'] = physics.finger_to_target()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    radii = physics.named.model.geom_size[['target', 'finger'], 0].sum()
+    return rewards.tolerance(physics.finger_to_target_dist(), (0, radii))
--- a/DPI/local_dm_control_suite/reacher.xml
+++ b/DPI/local_dm_control_suite/reacher.xml
@ -0,0 +1,47 @@
+<mujoco model="two-link planar reacher">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep="0.02">
+    <flag contact="disable"/>
+  </option>
+
+  <default>
+    <joint type="hinge" axis="0 0 1" damping="0.01"/>
+    <motor gear=".05" ctrlrange="-1 1" ctrllimited="true"/>
+  </default>
+
+  <worldbody>
+    <light name="light" pos="0 0 1"/>
+    <camera name="fixed" pos="0 0 .75" quat="1 0 0 0"/>
+    <!-- Arena -->
+    <geom name="ground" type="plane" pos="0 0 0" size=".3 .3 10" material="grid"/>
+    <geom name="wall_x" type="plane" pos="-.3 0 .02" zaxis="1 0 0"  size=".02 .3 .02" material="decoration"/>
+    <geom name="wall_y" type="plane" pos="0 -.3 .02" zaxis="0 1 0"  size=".3 .02 .02" material="decoration"/>
+    <geom name="wall_neg_x" type="plane" pos=".3 0 .02" zaxis="-1 0 0"  size=".02 .3 .02" material="decoration"/>
+    <geom name="wall_neg_y" type="plane" pos="0 .3 .02" zaxis="0 -1 0"  size=".3 .02 .02" material="decoration"/>
+
+    <!-- Arm -->
+    <geom name="root" type="cylinder" fromto="0 0 0 0 0 0.02" size=".011" material="decoration"/>
+    <body name="arm" pos="0 0 .01">
+      <geom name="arm" type="capsule" fromto="0 0 0 0.12 0 0" size=".01" material="self"/>
+      <joint name="shoulder"/>
+      <body name="hand" pos=".12 0 0">
+        <geom name="hand" type="capsule" fromto="0 0 0 0.1 0 0" size=".01" material="self"/>
+        <joint name="wrist" limited="true" range="-160 160"/>
+        <body name="finger" pos=".12 0 0">
+          <camera name="hand" pos="0 0 .2" mode="track"/>
+          <geom name="finger" type="sphere" size=".01" material="effector"/>
+        </body>
+      </body>
+    </body>
+    <!-- Target -->
+    <geom name="target" pos="0 0 .01" material="target" type="sphere" size=".05"/>
+  </worldbody>
+
+  <actuator>
+    <motor name="shoulder" joint="shoulder"/>
+    <motor name="wrist" joint="wrist"/>
+  </actuator>
+</mujoco>
--- a/DPI/local_dm_control_suite/stacker.py
+++ b/DPI/local_dm_control_suite/stacker.py
@ -0,0 +1,208 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Planar Stacker domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.utils import containers
+from dm_control.utils import rewards
+from dm_control.utils import xml_tools
+
+from lxml import etree
+import numpy as np
+
+
+_CLOSE = .01    # (Meters) Distance below which a thing is considered close.
+_CONTROL_TIMESTEP = .01  # (Seconds)
+_TIME_LIMIT = 10  # (Seconds)
+_ARM_JOINTS = ['arm_root', 'arm_shoulder', 'arm_elbow', 'arm_wrist',
+               'finger', 'fingertip', 'thumb', 'thumbtip']
+
+SUITE = containers.TaggedTasks()
+
+
+def make_model(n_boxes):
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  xml_string = common.read_model('stacker.xml')
+  parser = etree.XMLParser(remove_blank_text=True)
+  mjcf = etree.XML(xml_string, parser)
+
+  # Remove unused boxes
+  for b in range(n_boxes, 4):
+    box = xml_tools.find_element(mjcf, 'body', 'box' + str(b))
+    box.getparent().remove(box)
+
+  return etree.tostring(mjcf, pretty_print=True), common.ASSETS
+
+
+@SUITE.add('hard')
+def stack_2(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns stacker task with 2 boxes."""
+  n_boxes = 2
+  physics = Physics.from_xml_string(*make_model(n_boxes=n_boxes))
+  task = Stack(n_boxes=n_boxes,
+               fully_observable=fully_observable,
+               random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+      **environment_kwargs)
+
+
+@SUITE.add('hard')
+def stack_4(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns stacker task with 4 boxes."""
+  n_boxes = 4
+  physics = Physics.from_xml_string(*make_model(n_boxes=n_boxes))
+  task = Stack(n_boxes=n_boxes,
+               fully_observable=fully_observable,
+               random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+      **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics with additional features for the Planar Manipulator domain."""
+
+  def bounded_joint_pos(self, joint_names):
+    """Returns joint positions as (sin, cos) values."""
+    joint_pos = self.named.data.qpos[joint_names]
+    return np.vstack([np.sin(joint_pos), np.cos(joint_pos)]).T
+
+  def joint_vel(self, joint_names):
+    """Returns joint velocities."""
+    return self.named.data.qvel[joint_names]
+
+  def body_2d_pose(self, body_names, orientation=True):
+    """Returns positions and/or orientations of bodies."""
+    if not isinstance(body_names, str):
+      body_names = np.array(body_names).reshape(-1, 1)  # Broadcast indices.
+    pos = self.named.data.xpos[body_names, ['x', 'z']]
+    if orientation:
+      ori = self.named.data.xquat[body_names, ['qw', 'qy']]
+      return np.hstack([pos, ori])
+    else:
+      return pos
+
+  def touch(self):
+    return np.log1p(self.data.sensordata)
+
+  def site_distance(self, site1, site2):
+    site1_to_site2 = np.diff(self.named.data.site_xpos[[site2, site1]], axis=0)
+    return np.linalg.norm(site1_to_site2)
+
+
+class Stack(base.Task):
+  """A Stack `Task`: stack the boxes."""
+
+  def __init__(self, n_boxes, fully_observable, random=None):
+    """Initialize an instance of the `Stack` task.
+
+    Args:
+      n_boxes: An `int`, number of boxes to stack.
+      fully_observable: A `bool`, whether the observation should contain the
+        positions and velocities of the boxes and the location of the target.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._n_boxes = n_boxes
+    self._box_names = ['box' + str(b) for b in range(n_boxes)]
+    self._box_joint_names = []
+    for name in self._box_names:
+      for dim in 'xyz':
+        self._box_joint_names.append('_'.join([name, dim]))
+    self._fully_observable = fully_observable
+    super(Stack, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode."""
+    # Local aliases
+    randint = self.random.randint
+    uniform = self.random.uniform
+    model = physics.named.model
+    data = physics.named.data
+
+    # Find a collision-free random initial configuration.
+    penetrating = True
+    while penetrating:
+
+      # Randomise angles of arm joints.
+      is_limited = model.jnt_limited[_ARM_JOINTS].astype(np.bool)
+      joint_range = model.jnt_range[_ARM_JOINTS]
+      lower_limits = np.where(is_limited, joint_range[:, 0], -np.pi)
+      upper_limits = np.where(is_limited, joint_range[:, 1], np.pi)
+      angles = uniform(lower_limits, upper_limits)
+      data.qpos[_ARM_JOINTS] = angles
+
+      # Symmetrize hand.
+      data.qpos['finger'] = data.qpos['thumb']
+
+      # Randomise target location.
+      target_height = 2*randint(self._n_boxes) + 1
+      box_size = model.geom_size['target', 0]
+      model.body_pos['target', 'z'] = box_size * target_height
+      model.body_pos['target', 'x'] = uniform(-.37, .37)
+
+      # Randomise box locations.
+      for name in self._box_names:
+        data.qpos[name + '_x'] = uniform(.1, .3)
+        data.qpos[name + '_z'] = uniform(0, .7)
+        data.qpos[name + '_y'] = uniform(0, 2*np.pi)
+
+      # Check for collisions.
+      physics.after_reset()
+      penetrating = physics.data.ncon > 0
+
+    super(Stack, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns either features or only sensors (to be used with pixels)."""
+    obs = collections.OrderedDict()
+    obs['arm_pos'] = physics.bounded_joint_pos(_ARM_JOINTS)
+    obs['arm_vel'] = physics.joint_vel(_ARM_JOINTS)
+    obs['touch'] = physics.touch()
+    if self._fully_observable:
+      obs['hand_pos'] = physics.body_2d_pose('hand')
+      obs['box_pos'] = physics.body_2d_pose(self._box_names)
+      obs['box_vel'] = physics.joint_vel(self._box_joint_names)
+      obs['target_pos'] = physics.body_2d_pose('target', orientation=False)
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+    box_size = physics.named.model.geom_size['target', 0]
+    min_box_to_target_distance = min(physics.site_distance(name, 'target')
+                                     for name in self._box_names)
+    box_is_close = rewards.tolerance(min_box_to_target_distance,
+                                     margin=2*box_size)
+    hand_to_target_distance = physics.site_distance('grasp', 'target')
+    hand_is_far = rewards.tolerance(hand_to_target_distance,
+                                    bounds=(.1, float('inf')),
+                                    margin=_CLOSE)
+    return box_is_close * hand_is_far
--- a/DPI/local_dm_control_suite/stacker.xml
+++ b/DPI/local_dm_control_suite/stacker.xml
@ -0,0 +1,193 @@
+<mujoco model="planar stacker">
+
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials_white_floor.xml"/>
+  <asset>
+    <texture name="background" builtin="flat" type="2d" mark="random" markrgb="1 1 1" width="800" height="800" rgb1=".2 .3 .4"/>
+    <material name="background" texture="background" texrepeat="1 1" texuniform="true"/>
+  </asset>
+
+  <visual>
+    <map shadowclip=".5"/>
+    <quality shadowsize="2048"/>
+  </visual>>
+
+  <option timestep="0.001" cone="elliptic"/>
+
+  <default>
+    <geom friction=".7" solimp="0.9 0.97 0.001" solref=".01 1"/>
+    <joint solimplimit="0 0.99 0.01" solreflimit=".005 1"/>
+    <general ctrllimited="true"/>
+    <tendon width="0.01"/>
+    <site size=".003 .003 .003" material="site" group="3"/>
+
+    <default class="arm">
+      <geom type="capsule" material="self" density="500"/>
+      <joint type="hinge" pos="0 0 0" axis="0 -1 0" limited="true"/>
+      <default class="hand">
+        <joint damping=".5" range="-10 60"/>
+        <geom size=".008"/>
+        <site  type="box" size=".018 .005 .005" pos=".022 0 -.002" euler="0 15 0" group="4"/>
+        <default class="fingertip">
+          <geom type="sphere" size=".008" material="effector"/>
+          <joint damping=".01" stiffness=".01" range="-40 20"/>
+          <site  size=".012 .005 .008" pos=".003 0 .003" group="4" euler="0 0 0"/>
+        </default>
+      </default>
+    </default>
+
+    <default class="object">
+      <geom material="self"/>
+    </default>
+
+    <default class="task">
+      <site rgba="0 0 0 0"/>
+    </default>
+
+    <default class="obstacle">
+      <geom material="decoration" friction="0"/>
+    </default>
+
+    <default class="ghost">
+      <geom material="target" contype="0" conaffinity="0"/>
+    </default>
+  </default>
+
+  <worldbody>
+    <!-- Arena -->
+    <light name="light" directional="true" diffuse=".6 .6 .6" pos="0 0 1" specular=".3 .3 .3"/>
+    <geom name="floor" type="plane" pos="0 0 0" size=".4 .2 10" material="grid"/>
+    <geom name="wall1" type="plane" pos="-.682843 0 .282843" size=".4 .2 10" material="grid" zaxis="1 0 1"/>
+    <geom name="wall2" type="plane" pos=".682843 0 .282843" size=".4 .2 10" material="grid" zaxis="-1 0 1"/>
+    <geom name="background" type="plane" pos="0 .2 .5" size="1 .5 10" material="background" zaxis="0 -1 0"/>
+    <camera name="fixed" pos="0 -16 .4" xyaxes="1 0 0 0 0 1" fovy="4"/>
+
+    <!-- Arm -->
+    <geom name="arm_root" type="cylinder" fromto="0 -.022 .4 0 .022 .4" size=".024"
+          material="decoration" contype="0" conaffinity="0"/>
+    <body name="upper_arm" pos="0 0 .4" childclass="arm">
+      <joint name="arm_root" damping="2" limited="false"/>
+      <geom  name="upper_arm"  size=".02" fromto="0 0 0 0 0 .18"/>
+      <body  name="middle_arm" pos="0 0 .18" childclass="arm">
+        <joint name="arm_shoulder" damping="1.5" range="-160 160"/>
+        <geom  name="middle_arm"  size=".017" fromto="0 0 0 0 0 .15"/>
+        <body  name="lower_arm" pos="0 0 .15">
+          <joint name="arm_elbow" damping="1" range="-160 160"/>
+          <geom  name="lower_arm" size=".014" fromto="0 0 0 0 0 .12"/>
+          <body  name="hand" pos="0 0 .12">
+            <joint name="arm_wrist" damping=".5" range="-140 140" />
+            <geom  name="hand" size=".011" fromto="0 0 0 0 0 .03"/>
+            <geom  name="palm1"  fromto="0 0 .03  .03 0 .045" class="hand"/>
+            <geom  name="palm2"  fromto="0 0 .03 -.03 0 .045" class="hand"/>
+            <site  name="grasp" pos="0 0 .065"/>
+            <body  name="pinch site" pos="0 0 .090">
+              <site  name="pinch"/>
+              <inertial pos="0 0 0" mass="1e-6" diaginertia="1e-12 1e-12 1e-12"/>
+              <camera name="hand" pos="0 -.3 0" xyaxes="1 0 0 0 0 1" mode="track"/>
+            </body>
+            <site  name="palm_touch" type="box" group="4" size=".025 .005 .008" pos="0 0 .043"/>
+
+            <body name="thumb" pos=".03 0 .045" euler="0 -90 0" childclass="hand">
+              <joint name="thumb"/>
+              <geom  name="thumb1"  fromto="0 0 0 .02 0 -.01" size=".007"/>
+              <geom  name="thumb2"  fromto=".02 0 -.01 .04 0 -.01" size=".007"/>
+              <site  name="thumb_touch" group="4"/>
+              <body  name="thumbtip" pos=".05 0 -.01" childclass="fingertip">
+                <joint name="thumbtip"/>
+                <geom  name="thumbtip1" pos="-.003 0 0" />
+                <geom  name="thumbtip2" pos=".003 0 0" />
+                <site  name="thumbtip_touch" group="4"/>
+              </body>
+            </body>
+
+            <body name="finger" pos="-.03 0 .045" euler="0 90 180" childclass="hand">
+              <joint name="finger"/>
+              <geom  name="finger1"  fromto="0 0 0 .02 0 -.01" size=".007" />
+              <geom  name="finger2"  fromto=".02 0 -.01 .04 0 -.01" size=".007"/>
+              <site  name="finger_touch"/>
+              <body  name="fingertip" pos=".05 0 -.01" childclass="fingertip">
+                <joint name="fingertip"/>
+                <geom  name="fingertip1" pos="-.003 0 0" />
+                <geom  name="fingertip2" pos=".003 0 0" />
+                <site  name="fingertip_touch"/>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+
+    <!-- props -->
+    <body name="box0" pos=".5 0 .4" childclass="object">
+      <joint name="box0_x" type="slide" axis="1 0 0" ref=".5"/>
+      <joint name="box0_z" type="slide" axis="0 0 1" ref=".4"/>
+      <joint name="box0_y" type="hinge" axis="0 1 0"/>
+      <geom  name="box0" type="box" size=".022 .022 .022" />
+      <site  name="box0" type="sphere"/>
+    </body>
+
+    <body name="box1" pos=".4 0 .4" childclass="object">
+      <joint name="box1_x" type="slide" axis="1 0 0" ref=".4"/>
+      <joint name="box1_z" type="slide" axis="0 0 1" ref=".4"/>
+      <joint name="box1_y" type="hinge" axis="0 1 0"/>
+      <geom  name="box1" type="box" size=".022 .022 .022" />
+      <site  name="box1" type="sphere"/>
+    </body>
+
+    <body name="box2" pos=".3 0 .4" childclass="object">
+      <joint name="box2_x" type="slide" axis="1 0 0" ref=".3"/>
+      <joint name="box2_z" type="slide" axis="0 0 1" ref=".4"/>
+      <joint name="box2_y" type="hinge" axis="0 1 0"/>
+      <geom  name="box2" type="box" size=".022 .022 .022" />
+      <site  name="box2" type="sphere"/>
+    </body>
+
+    <body name="box3" pos=".2 0 .4" childclass="object">
+      <joint name="box3_x" type="slide" axis="1 0 0" ref=".2"/>
+      <joint name="box3_z" type="slide" axis="0 0 1" ref=".4"/>
+      <joint name="box3_y" type="hinge" axis="0 1 0"/>
+      <geom  name="box3" type="box" size=".022 .022 .022" />
+      <site  name="box3" type="sphere"/>
+    </body>
+
+
+    <!-- targets -->
+    <body name="target" pos="0 .001 .022" childclass="ghost">
+      <geom  name="target" type="box" size=".022 .022 .022" />
+      <site  name="target" type="sphere"/>
+    </body>
+  </worldbody>
+
+  <tendon>
+    <fixed name="grasp">
+      <joint joint="thumb"  coef=".5"/>
+      <joint joint="finger" coef=".5"/>
+    </fixed>
+    <fixed name="coupling">
+      <joint joint="thumb"  coef="-.5"/>
+      <joint joint="finger" coef=".5"/>
+    </fixed>
+  </tendon>
+
+  <equality>
+    <tendon name="coupling" tendon1="coupling" solimp="0.95 0.99 0.001" solref=".005 .5"/>
+  </equality>
+
+  <sensor>
+    <touch name="palm_touch" site="palm_touch"/>
+    <touch name="finger_touch" site="finger_touch"/>
+    <touch name="thumb_touch" site="thumb_touch"/>
+    <touch name="fingertip_touch" site="fingertip_touch"/>
+    <touch name="thumbtip_touch" site="thumbtip_touch"/>
+  </sensor>
+
+  <actuator>
+    <motor name="root"     joint="arm_root"     ctrlrange="-1 1"  gear="12"/>
+    <motor name="shoulder" joint="arm_shoulder" ctrlrange="-1 1"  gear="8"/>
+    <motor name="elbow"    joint="arm_elbow"    ctrlrange="-1 1"  gear="4"/>
+    <motor name="wrist"    joint="arm_wrist"    ctrlrange="-1 1"  gear="2"/>
+    <motor name="grasp"    tendon="grasp"       ctrlrange="-1 1"  gear="2"/>
+  </actuator>
+
+</mujoco>
--- a/DPI/local_dm_control_suite/swimmer.py
+++ b/DPI/local_dm_control_suite/swimmer.py
@ -0,0 +1,215 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Procedurally generated Swimmer domain."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.suite.utils import randomizers
+from dm_control.utils import containers
+from dm_control.utils import rewards
+from lxml import etree
+import numpy as np
+from six.moves import range
+
+_DEFAULT_TIME_LIMIT = 30
+_CONTROL_TIMESTEP = .03  # (Seconds)
+
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets(n_joints):
+  """Returns a tuple containing the model XML string and a dict of assets.
+
+  Args:
+    n_joints: An integer specifying the number of joints in the swimmer.
+
+  Returns:
+    A tuple `(model_xml_string, assets)`, where `assets` is a dict consisting of
+    `{filename: contents_string}` pairs.
+  """
+  return _make_model(n_joints), common.ASSETS
+
+
+@SUITE.add('benchmarking')
+def swimmer6(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+             environment_kwargs=None):
+  """Returns a 6-link swimmer."""
+  return _make_swimmer(6, time_limit, random=random,
+                       environment_kwargs=environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def swimmer15(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+              environment_kwargs=None):
+  """Returns a 15-link swimmer."""
+  return _make_swimmer(15, time_limit, random=random,
+                       environment_kwargs=environment_kwargs)
+
+
+def swimmer(n_links=3, time_limit=_DEFAULT_TIME_LIMIT,
+            random=None, environment_kwargs=None):
+  """Returns a swimmer with n links."""
+  return _make_swimmer(n_links, time_limit, random=random,
+                       environment_kwargs=environment_kwargs)
+
+
+def _make_swimmer(n_joints, time_limit=_DEFAULT_TIME_LIMIT, random=None,
+                  environment_kwargs=None):
+  """Returns a swimmer control environment."""
+  model_string, assets = get_model_and_assets(n_joints)
+  physics = Physics.from_xml_string(model_string, assets=assets)
+  task = Swimmer(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+def _make_model(n_bodies):
+  """Generates an xml string defining a swimmer with `n_bodies` bodies."""
+  if n_bodies < 3:
+    raise ValueError('At least 3 bodies required. Received {}'.format(n_bodies))
+  mjcf = etree.fromstring(common.read_model('swimmer.xml'))
+  head_body = mjcf.find('./worldbody/body')
+  actuator = etree.SubElement(mjcf, 'actuator')
+  sensor = etree.SubElement(mjcf, 'sensor')
+
+  parent = head_body
+  for body_index in range(n_bodies - 1):
+    site_name = 'site_{}'.format(body_index)
+    child = _make_body(body_index=body_index)
+    child.append(etree.Element('site', name=site_name))
+    joint_name = 'joint_{}'.format(body_index)
+    joint_limit = 360.0/n_bodies
+    joint_range = '{} {}'.format(-joint_limit, joint_limit)
+    child.append(etree.Element('joint', {'name': joint_name,
+                                         'range': joint_range}))
+    motor_name = 'motor_{}'.format(body_index)
+    actuator.append(etree.Element('motor', name=motor_name, joint=joint_name))
+    velocimeter_name = 'velocimeter_{}'.format(body_index)
+    sensor.append(etree.Element('velocimeter', name=velocimeter_name,
+                                site=site_name))
+    gyro_name = 'gyro_{}'.format(body_index)
+    sensor.append(etree.Element('gyro', name=gyro_name, site=site_name))
+    parent.append(child)
+    parent = child
+
+  # Move tracking cameras further away from the swimmer according to its length.
+  cameras = mjcf.findall('./worldbody/body/camera')
+  scale = n_bodies / 6.0
+  for cam in cameras:
+    if cam.get('mode') == 'trackcom':
+      old_pos = cam.get('pos').split(' ')
+      new_pos = ' '.join([str(float(dim) * scale) for dim in old_pos])
+      cam.set('pos', new_pos)
+
+  return etree.tostring(mjcf, pretty_print=True)
+
+
+def _make_body(body_index):
+  """Generates an xml string defining a single physical body."""
+  body_name = 'segment_{}'.format(body_index)
+  visual_name = 'visual_{}'.format(body_index)
+  inertial_name = 'inertial_{}'.format(body_index)
+  body = etree.Element('body', name=body_name)
+  body.set('pos', '0 .1 0')
+  etree.SubElement(body, 'geom', {'class': 'visual', 'name': visual_name})
+  etree.SubElement(body, 'geom', {'class': 'inertial', 'name': inertial_name})
+  return body
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the swimmer domain."""
+
+  def nose_to_target(self):
+    """Returns a vector from nose to target in local coordinate of the head."""
+    nose_to_target = (self.named.data.geom_xpos['target'] -
+                      self.named.data.geom_xpos['nose'])
+    head_orientation = self.named.data.xmat['head'].reshape(3, 3)
+    return nose_to_target.dot(head_orientation)[:2]
+
+  def nose_to_target_dist(self):
+    """Returns the distance from the nose to the target."""
+    return np.linalg.norm(self.nose_to_target())
+
+  def body_velocities(self):
+    """Returns local body velocities: x,y linear, z rotational."""
+    xvel_local = self.data.sensordata[12:].reshape((-1, 6))
+    vx_vy_wz = [0, 1, 5]  # Indices for linear x,y vels and rotational z vel.
+    return xvel_local[:, vx_vy_wz].ravel()
+
+  def joints(self):
+    """Returns all internal joint angles (excluding root joints)."""
+    return self.data.qpos[3:].copy()
+
+
+class Swimmer(base.Task):
+  """A swimmer `Task` to reach the target or just swim."""
+
+  def __init__(self, random=None):
+    """Initializes an instance of `Swimmer`.
+
+    Args:
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    super(Swimmer, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+
+    Initializes the swimmer orientation to [-pi, pi) and the relative joint
+    angle of each joint uniformly within its range.
+
+    Args:
+      physics: An instance of `Physics`.
+    """
+    # Random joint angles:
+    randomizers.randomize_limited_and_rotational_joints(physics, self.random)
+    # Random target position.
+    close_target = self.random.rand() < .2  # Probability of a close target.
+    target_box = .3 if close_target else 2
+    xpos, ypos = self.random.uniform(-target_box, target_box, size=2)
+    physics.named.model.geom_pos['target', 'x'] = xpos
+    physics.named.model.geom_pos['target', 'y'] = ypos
+    physics.named.model.light_pos['target_light', 'x'] = xpos
+    physics.named.model.light_pos['target_light', 'y'] = ypos
+
+    super(Swimmer, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of joint angles, body velocities and target."""
+    obs = collections.OrderedDict()
+    obs['joints'] = physics.joints()
+    obs['to_target'] = physics.nose_to_target()
+    obs['body_velocities'] = physics.body_velocities()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a smooth reward."""
+    target_size = physics.named.model.geom_size['target', 0]
+    return rewards.tolerance(physics.nose_to_target_dist(),
+                             bounds=(0, target_size),
+                             margin=5*target_size,
+                             sigmoid='long_tail')
--- a/DPI/local_dm_control_suite/swimmer.xml
+++ b/DPI/local_dm_control_suite/swimmer.xml
@ -0,0 +1,57 @@
+<mujoco model="swimmer">
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep="0.002" density="3000">
+    <flag contact="disable"/>
+  </option>
+
+  <default>
+    <default class="swimmer">
+      <joint type="hinge" pos="0 -.05 0" axis="0 0 1" limited="true" solreflimit=".05 1" solimplimit="0 .8 .1" armature="1e-6"/>
+      <default class="inertial">
+        <geom type="box" size=".001 .05 .01" rgba="0 0 0 0" mass=".01"/>
+      </default>
+      <default class="visual">
+        <geom type="capsule" size=".01" fromto="0 -.05 0 0 .05 0" material="self" mass="0"/>
+      </default>
+      <site size=".01" rgba="0 0 0 0"/>
+    </default>
+    <default class="free">
+      <joint limited="false" stiffness="0" armature="0"/>
+    </default>
+    <motor gear="5e-4" ctrllimited="true" ctrlrange="-1 1"/>
+  </default>
+
+  <worldbody>
+    <geom name="ground" type="plane" size="2 2 0.1" material="grid"/>
+    <body name="head" pos="0 0 .05" childclass="swimmer">
+      <light name="light_1" diffuse=".8 .8 .8" pos="0 0 1.5"/>
+      <geom name="head" type="ellipsoid" size=".02 .04 .017" pos="0 -.022 0"  material="self" mass="0"/>
+      <geom name="nose" type="sphere" pos="0 -.06 0" size=".004" material="effector" mass="0"/>
+      <geom name="eyes" type="capsule" fromto="-.006 -.054 .005 .006 -.054 .005" size=".004" material="eye" mass="0"/>
+      <camera name="tracking1" pos="0 -.2 .5" xyaxes="1 0 0 0 1 1" mode="trackcom" fovy="60"/>
+      <camera name="tracking2" pos="-.9 .5 .15" xyaxes="0 -1 0 .3 0 1" mode="trackcom" fovy="60"/>
+      <camera name="eyes" pos="0 -.058 .005" xyaxes="-1 0 0 0 0 1"/>
+      <joint name="rootx" class="free" type="slide" axis="1 0 0" pos="0 -.05 0"/>
+      <joint name="rooty" class="free" type="slide" axis="0 1 0" pos="0 -.05 0"/>
+      <joint name="rootz" class="free" type="hinge" axis="0 0 1" pos="0 -.05 0"/>
+      <geom name="inertial" class="inertial"/>
+      <geom name="visual" class="visual"/>
+      <site name="head"/>
+    </body>
+    <geom name="target" type="sphere" pos="1 1 .05" size=".1" material="target"/>
+    <light name="target_light" diffuse="1 1 1" pos="1 1 1.5"/>
+  </worldbody>
+
+  <sensor>
+    <framepos name="nose_pos" objtype="geom" objname="nose"/>
+    <framepos name="target_pos" objtype="geom" objname="target"/>
+    <framexaxis name="head_xaxis" objtype="xbody" objname="head"/>
+    <frameyaxis name="head_yaxis" objtype="xbody" objname="head"/>
+    <velocimeter name="head_vel" site="head"/>
+    <gyro name="head_gyro" site="head"/>
+  </sensor>
+
+</mujoco>
--- a/DPI/local_dm_control_suite/tests/domains_test.py
+++ b/DPI/local_dm_control_suite/tests/domains_test.py
@ -0,0 +1,292 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Tests for dm_control.suite domains."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# Internal dependencies.
+from absl.testing import absltest
+from absl.testing import parameterized
+from dm_control import suite
+from dm_control.rl import control
+import mock
+import numpy as np
+import six
+from six.moves import range
+from six.moves import zip
+
+
+def uniform_random_policy(action_spec, random=None):
+  lower_bounds = action_spec.minimum
+  upper_bounds = action_spec.maximum
+  # Draw values between -1 and 1 for unbounded actions.
+  lower_bounds = np.where(np.isinf(lower_bounds), -1.0, lower_bounds)
+  upper_bounds = np.where(np.isinf(upper_bounds), 1.0, upper_bounds)
+  random_state = np.random.RandomState(random)
+  def policy(time_step):
+    del time_step  # Unused.
+    return random_state.uniform(lower_bounds, upper_bounds)
+  return policy
+
+
+def step_environment(env, policy, num_episodes=5, max_steps_per_episode=10):
+  for _ in range(num_episodes):
+    step_count = 0
+    time_step = env.reset()
+    yield time_step
+    while not time_step.last():
+      action = policy(time_step)
+      time_step = env.step(action)
+      step_count += 1
+      yield time_step
+      if step_count >= max_steps_per_episode:
+        break
+
+
+def make_trajectory(domain, task, seed, **trajectory_kwargs):
+  env = suite.load(domain, task, task_kwargs={'random': seed})
+  policy = uniform_random_policy(env.action_spec(), random=seed)
+  return step_environment(env, policy, **trajectory_kwargs)
+
+
+class DomainTest(parameterized.TestCase):
+  """Tests run on all the tasks registered."""
+
+  def test_constants(self):
+    num_tasks = sum(len(tasks) for tasks in
+                    six.itervalues(suite.TASKS_BY_DOMAIN))
+
+    self.assertLen(suite.ALL_TASKS, num_tasks)
+
+  def _validate_observation(self, observation_dict, observation_spec):
+    obs = observation_dict.copy()
+    for name, spec in six.iteritems(observation_spec):
+      arr = obs.pop(name)
+      self.assertEqual(arr.shape, spec.shape)
+      self.assertEqual(arr.dtype, spec.dtype)
+      self.assertTrue(
+          np.all(np.isfinite(arr)),
+          msg='{!r} has non-finite value(s): {!r}'.format(name, arr))
+    self.assertEmpty(
+        obs,
+        msg='Observation contains arrays(s) that are not in the spec: {!r}'
+        .format(obs))
+
+  def _validate_reward_range(self, time_step):
+    if time_step.first():
+      self.assertIsNone(time_step.reward)
+    else:
+      self.assertIsInstance(time_step.reward, float)
+      self.assertBetween(time_step.reward, 0, 1)
+
+  def _validate_discount(self, time_step):
+    if time_step.first():
+      self.assertIsNone(time_step.discount)
+    else:
+      self.assertIsInstance(time_step.discount, float)
+      self.assertBetween(time_step.discount, 0, 1)
+
+  def _validate_control_range(self, lower_bounds, upper_bounds):
+    for b in lower_bounds:
+      self.assertEqual(b, -1.0)
+    for b in upper_bounds:
+      self.assertEqual(b, 1.0)
+
+  @parameterized.parameters(*suite.ALL_TASKS)
+  def test_components_have_names(self, domain, task):
+    env = suite.load(domain, task)
+    model = env.physics.model
+
+    object_types_and_size_fields = [
+        ('body', 'nbody'),
+        ('joint', 'njnt'),
+        ('geom', 'ngeom'),
+        ('site', 'nsite'),
+        ('camera', 'ncam'),
+        ('light', 'nlight'),
+        ('mesh', 'nmesh'),
+        ('hfield', 'nhfield'),
+        ('texture', 'ntex'),
+        ('material', 'nmat'),
+        ('equality', 'neq'),
+        ('tendon', 'ntendon'),
+        ('actuator', 'nu'),
+        ('sensor', 'nsensor'),
+        ('numeric', 'nnumeric'),
+        ('text', 'ntext'),
+        ('tuple', 'ntuple'),
+    ]
+    for object_type, size_field in object_types_and_size_fields:
+      for idx in range(getattr(model, size_field)):
+        object_name = model.id2name(idx, object_type)
+        self.assertNotEqual(object_name, '',
+                            msg='Model {!r} contains unnamed {!r} with ID {}.'
+                            .format(model.name, object_type, idx))
+
+  @parameterized.parameters(*suite.ALL_TASKS)
+  def test_model_has_at_least_2_cameras(self, domain, task):
+    env = suite.load(domain, task)
+    model = env.physics.model
+    self.assertGreaterEqual(model.ncam, 2,
+                            'Model {!r} should have at least 2 cameras, has {}.'
+                            .format(model.name, model.ncam))
+
+  @parameterized.parameters(*suite.ALL_TASKS)
+  def test_task_conforms_to_spec(self, domain, task):
+    """Tests that the environment timesteps conform to specifications."""
+    is_benchmark = (domain, task) in suite.BENCHMARKING
+    env = suite.load(domain, task)
+    observation_spec = env.observation_spec()
+    action_spec = env.action_spec()
+
+    # Check action bounds.
+    if is_benchmark:
+      self._validate_control_range(action_spec.minimum, action_spec.maximum)
+
+    # Step through the environment, applying random actions sampled within the
+    # valid range and check the observations, rewards, and discounts.
+    policy = uniform_random_policy(action_spec)
+    for time_step in step_environment(env, policy):
+      self._validate_observation(time_step.observation, observation_spec)
+      self._validate_discount(time_step)
+      if is_benchmark:
+        self._validate_reward_range(time_step)
+
+  @parameterized.parameters(*suite.ALL_TASKS)
+  def test_environment_is_deterministic(self, domain, task):
+    """Tests that identical seeds and actions produce identical trajectories."""
+    seed = 0
+    # Iterate over two trajectories generated using identical sequences of
+    # random actions, and with identical task random states. Check that the
+    # observations, rewards, discounts and step types are identical.
+    trajectory1 = make_trajectory(domain=domain, task=task, seed=seed)
+    trajectory2 = make_trajectory(domain=domain, task=task, seed=seed)
+    for time_step1, time_step2 in zip(trajectory1, trajectory2):
+      self.assertEqual(time_step1.step_type, time_step2.step_type)
+      self.assertEqual(time_step1.reward, time_step2.reward)
+      self.assertEqual(time_step1.discount, time_step2.discount)
+      for key in six.iterkeys(time_step1.observation):
+        np.testing.assert_array_equal(
+            time_step1.observation[key], time_step2.observation[key],
+            err_msg='Observation {!r} is not equal.'.format(key))
+
+  def assertCorrectColors(self, physics, reward):
+    colors = physics.named.model.mat_rgba
+    for material_name in ('self', 'effector', 'target'):
+      highlight = colors[material_name + '_highlight']
+      default = colors[material_name + '_default']
+      blend_coef = reward ** 4
+      expected = blend_coef * highlight + (1.0 - blend_coef) * default
+      actual = colors[material_name]
+      err_msg = ('Material {!r} has unexpected color.\nExpected: {!r}\n'
+                 'Actual: {!r}'.format(material_name, expected, actual))
+      np.testing.assert_array_almost_equal(expected, actual, err_msg=err_msg)
+
+  @parameterized.parameters(*suite.ALL_TASKS)
+  def test_visualize_reward(self, domain, task):
+    env = suite.load(domain, task)
+    env.task.visualize_reward = True
+    action = np.zeros(env.action_spec().shape)
+
+    with mock.patch.object(env.task, 'get_reward') as mock_get_reward:
+      mock_get_reward.return_value = -3.0  # Rewards < 0 should be clipped.
+      env.reset()
+      mock_get_reward.assert_called_with(env.physics)
+      self.assertCorrectColors(env.physics, reward=0.0)
+
+      mock_get_reward.reset_mock()
+      mock_get_reward.return_value = 0.5
+      env.step(action)
+      mock_get_reward.assert_called_with(env.physics)
+      self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value)
+
+      mock_get_reward.reset_mock()
+      mock_get_reward.return_value = 2.0  # Rewards > 1 should be clipped.
+      env.step(action)
+      mock_get_reward.assert_called_with(env.physics)
+      self.assertCorrectColors(env.physics, reward=1.0)
+
+      mock_get_reward.reset_mock()
+      mock_get_reward.return_value = 0.25
+      env.reset()
+      mock_get_reward.assert_called_with(env.physics)
+      self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value)
+
+  @parameterized.parameters(*suite.ALL_TASKS)
+  def test_task_supports_environment_kwargs(self, domain, task):
+    env = suite.load(domain, task,
+                     environment_kwargs=dict(flat_observation=True))
+    # Check that the kwargs are actually passed through to the environment.
+    self.assertSetEqual(set(env.observation_spec()),
+                        {control.FLAT_OBSERVATION_KEY})
+
+  @parameterized.parameters(*suite.ALL_TASKS)
+  def test_observation_arrays_dont_share_memory(self, domain, task):
+    env = suite.load(domain, task)
+    first_timestep = env.reset()
+    action = np.zeros(env.action_spec().shape)
+    second_timestep = env.step(action)
+    for name, first_array in six.iteritems(first_timestep.observation):
+      second_array = second_timestep.observation[name]
+      self.assertFalse(
+          np.may_share_memory(first_array, second_array),
+          msg='Consecutive observations of {!r} may share memory.'.format(name))
+
+  @parameterized.parameters(*suite.ALL_TASKS)
+  def test_observations_dont_contain_constant_elements(self, domain, task):
+    env = suite.load(domain, task)
+    trajectory = make_trajectory(domain=domain, task=task, seed=0,
+                                 num_episodes=2, max_steps_per_episode=1000)
+    observations = {name: [] for name in env.observation_spec()}
+    for time_step in trajectory:
+      for name, array in six.iteritems(time_step.observation):
+        observations[name].append(array)
+
+    failures = []
+
+    for name, array_list in six.iteritems(observations):
+      # Sampling random uniform actions generally isn't sufficient to trigger
+      # these touch sensors.
+      if (domain in ('manipulator', 'stacker') and name == 'touch' or
+          domain == 'quadruped' and name == 'force_torque'):
+        continue
+      stacked_arrays = np.array(array_list)
+      is_constant = np.all(stacked_arrays == stacked_arrays[0], axis=0)
+      has_constant_elements = (
+          is_constant if np.isscalar(is_constant) else np.any(is_constant))
+      if has_constant_elements:
+        failures.append((name, is_constant))
+
+    self.assertEmpty(
+        failures,
+        msg='The following observation(s) contain constant elements:\n{}'
+        .format('\n'.join(':\t'.join([name, str(is_constant)])
+                          for (name, is_constant) in failures)))
+
+  @parameterized.parameters(*suite.ALL_TASKS)
+  def test_initial_state_is_randomized(self, domain, task):
+    env = suite.load(domain, task, task_kwargs={'random': 42})
+    obs1 = env.reset().observation
+    obs2 = env.reset().observation
+    self.assertFalse(
+        all(np.all(obs1[k] == obs2[k]) for k in obs1),
+        'Two consecutive initial states have identical observations.\n'
+        'First: {}\nSecond: {}'.format(obs1, obs2))
+
+if __name__ == '__main__':
+  absltest.main()
--- a/DPI/local_dm_control_suite/tests/loader_test.py
+++ b/DPI/local_dm_control_suite/tests/loader_test.py
@ -0,0 +1,52 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Tests for the dm_control.suite loader."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# Internal dependencies.
+
+from absl.testing import absltest
+
+from dm_control import suite
+from dm_control.rl import control
+
+
+class LoaderTest(absltest.TestCase):
+
+  def test_load_without_kwargs(self):
+    env = suite.load('cartpole', 'swingup')
+    self.assertIsInstance(env, control.Environment)
+
+  def test_load_with_kwargs(self):
+    env = suite.load('cartpole', 'swingup',
+                     task_kwargs={'time_limit': 40, 'random': 99})
+    self.assertIsInstance(env, control.Environment)
+
+
+class LoaderConstantsTest(absltest.TestCase):
+
+  def testSuiteConstants(self):
+    self.assertNotEmpty(suite.BENCHMARKING)
+    self.assertNotEmpty(suite.EASY)
+    self.assertNotEmpty(suite.HARD)
+    self.assertNotEmpty(suite.EXTRA)
+
+
+if __name__ == '__main__':
+  absltest.main()
--- a/DPI/local_dm_control_suite/tests/lqr_test.py
+++ b/DPI/local_dm_control_suite/tests/lqr_test.py
@ -0,0 +1,88 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Tests specific to the LQR domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import  math
+import unittest
+
+# Internal dependencies.
+from absl import logging
+
+from absl.testing import absltest
+from absl.testing import parameterized
+
+from local_dm_control_suite import lqr
+from local_dm_control_suite import lqr_solver
+
+import numpy as np
+from six.moves import range
+
+
+class LqrTest(parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ('lqr_2_1', lqr.lqr_2_1),
+      ('lqr_6_2', lqr.lqr_6_2))
+  def test_lqr_optimal_policy(self, make_env):
+    env = make_env()
+    p, k, beta = lqr_solver.solve(env)
+    self.assertPolicyisOptimal(env, p, k, beta)
+
+  @parameterized.named_parameters(
+      ('lqr_2_1', lqr.lqr_2_1),
+      ('lqr_6_2', lqr.lqr_6_2))
+  @unittest.skipUnless(
+      condition=lqr_solver.sp,
+      reason='scipy is not available, so non-scipy DARE solver is the default.')
+  def test_lqr_optimal_policy_no_scipy(self, make_env):
+    env = make_env()
+    old_sp = lqr_solver.sp
+    try:
+      lqr_solver.sp = None  # Force the solver to use the non-scipy code path.
+      p, k, beta = lqr_solver.solve(env)
+    finally:
+      lqr_solver.sp = old_sp
+    self.assertPolicyisOptimal(env, p, k, beta)
+
+  def assertPolicyisOptimal(self, env, p, k, beta):
+    tolerance = 1e-3
+    n_steps = int(math.ceil(math.log10(tolerance) / math.log10(beta)))
+    logging.info('%d timesteps for %g convergence.', n_steps, tolerance)
+    total_loss = 0.0
+
+    timestep = env.reset()
+    initial_state = np.hstack((timestep.observation['position'],
+                               timestep.observation['velocity']))
+    logging.info('Measuring total cost over %d steps.', n_steps)
+    for _ in range(n_steps):
+      x = np.hstack((timestep.observation['position'],
+                     timestep.observation['velocity']))
+      # u = k*x is the optimal policy
+      u = k.dot(x)
+      total_loss += 1 - (timestep.reward or 0.0)
+      timestep = env.step(u)
+
+    logging.info('Analytical expected total cost is .5*x^T*p*x.')
+    expected_loss = .5 * initial_state.T.dot(p).dot(initial_state)
+    logging.info('Comparing measured and predicted costs.')
+    np.testing.assert_allclose(expected_loss, total_loss, rtol=tolerance)
+
+if __name__ == '__main__':
+  absltest.main()
--- a/DPI/local_dm_control_suite/utils/init.py
+++ b/DPI/local_dm_control_suite/utils/init.py
@ -0,0 +1,16 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Utility functions used in the control suite."""
--- a/DPI/local_dm_control_suite/utils/parse_amc.py
+++ b/DPI/local_dm_control_suite/utils/parse_amc.py
@ -0,0 +1,251 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Parse and convert amc motion capture data."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control.mujoco.wrapper import mjbindings
+import numpy as np
+from scipy import interpolate
+from six.moves import range
+
+mjlib = mjbindings.mjlib
+
+MOCAP_DT = 1.0/120.0
+CONVERSION_LENGTH = 0.056444
+
+_CMU_MOCAP_JOINT_ORDER = (
+    'root0', 'root1', 'root2', 'root3', 'root4', 'root5', 'lowerbackrx',
+    'lowerbackry', 'lowerbackrz', 'upperbackrx', 'upperbackry', 'upperbackrz',
+    'thoraxrx', 'thoraxry', 'thoraxrz', 'lowerneckrx', 'lowerneckry',
+    'lowerneckrz', 'upperneckrx', 'upperneckry', 'upperneckrz', 'headrx',
+    'headry', 'headrz', 'rclaviclery', 'rclaviclerz', 'rhumerusrx',
+    'rhumerusry', 'rhumerusrz', 'rradiusrx', 'rwristry', 'rhandrx', 'rhandrz',
+    'rfingersrx', 'rthumbrx', 'rthumbrz', 'lclaviclery', 'lclaviclerz',
+    'lhumerusrx', 'lhumerusry', 'lhumerusrz', 'lradiusrx', 'lwristry',
+    'lhandrx', 'lhandrz', 'lfingersrx', 'lthumbrx', 'lthumbrz', 'rfemurrx',
+    'rfemurry', 'rfemurrz', 'rtibiarx', 'rfootrx', 'rfootrz', 'rtoesrx',
+    'lfemurrx', 'lfemurry', 'lfemurrz', 'ltibiarx', 'lfootrx', 'lfootrz',
+    'ltoesrx'
+)
+
+Converted = collections.namedtuple('Converted',
+                                   ['qpos', 'qvel', 'time'])
+
+
+def convert(file_name, physics, timestep):
+  """Converts the parsed .amc values into qpos and qvel values and resamples.
+
+  Args:
+    file_name: The .amc file to be parsed and converted.
+    physics: The corresponding physics instance.
+    timestep: Desired output interval between resampled frames.
+
+  Returns:
+    A namedtuple with fields:
+        `qpos`, a numpy array containing converted positional variables.
+        `qvel`, a numpy array containing converted velocity variables.
+        `time`, a numpy array containing the corresponding times.
+  """
+  frame_values = parse(file_name)
+  joint2index = {}
+  for name in physics.named.data.qpos.axes.row.names:
+    joint2index[name] = physics.named.data.qpos.axes.row.convert_key_item(name)
+  index2joint = {}
+  for joint, index in joint2index.items():
+    if isinstance(index, slice):
+      indices = range(index.start, index.stop)
+    else:
+      indices = [index]
+    for ii in indices:
+      index2joint[ii] = joint
+
+  # Convert frame_values to qpos
+  amcvals2qpos_transformer = Amcvals2qpos(index2joint, _CMU_MOCAP_JOINT_ORDER)
+  qpos_values = []
+  for frame_value in frame_values:
+    qpos_values.append(amcvals2qpos_transformer(frame_value))
+  qpos_values = np.stack(qpos_values)  # Time by nq
+
+  # Interpolate/resample.
+  # Note: interpolate quaternions rather than euler angles (slerp).
+  # see https://en.wikipedia.org/wiki/Slerp
+  qpos_values_resampled = []
+  time_vals = np.arange(0, len(frame_values)*MOCAP_DT - 1e-8, MOCAP_DT)
+  time_vals_new = np.arange(0, len(frame_values)*MOCAP_DT, timestep)
+  while time_vals_new[-1] > time_vals[-1]:
+    time_vals_new = time_vals_new[:-1]
+
+  for i in range(qpos_values.shape[1]):
+    f = interpolate.splrep(time_vals, qpos_values[:, i])
+    qpos_values_resampled.append(interpolate.splev(time_vals_new, f))
+
+  qpos_values_resampled = np.stack(qpos_values_resampled)  # nq by ntime
+
+  qvel_list = []
+  for t in range(qpos_values_resampled.shape[1]-1):
+    p_tp1 = qpos_values_resampled[:, t + 1]
+    p_t = qpos_values_resampled[:, t]
+    qvel = [(p_tp1[:3]-p_t[:3])/ timestep,
+            mj_quat2vel(mj_quatdiff(p_t[3:7], p_tp1[3:7]), timestep),
+            (p_tp1[7:]-p_t[7:])/ timestep]
+    qvel_list.append(np.concatenate(qvel))
+
+  qvel_values_resampled = np.vstack(qvel_list).T
+
+  return Converted(qpos_values_resampled, qvel_values_resampled, time_vals_new)
+
+
+def parse(file_name):
+  """Parses the amc file format."""
+  values = []
+  fid = open(file_name, 'r')
+  line = fid.readline().strip()
+  frame_ind = 1
+  first_frame = True
+  while True:
+    # Parse first frame.
+    if first_frame and line[0] == str(frame_ind):
+      first_frame = False
+      frame_ind += 1
+      frame_vals = []
+      while True:
+        line = fid.readline().strip()
+        if not line or line == str(frame_ind):
+          values.append(np.array(frame_vals, dtype=np.float))
+          break
+        tokens = line.split()
+        frame_vals.extend(tokens[1:])
+    # Parse other frames.
+    elif line == str(frame_ind):
+      frame_ind += 1
+      frame_vals = []
+      while True:
+        line = fid.readline().strip()
+        if not line or line == str(frame_ind):
+          values.append(np.array(frame_vals, dtype=np.float))
+          break
+        tokens = line.split()
+        frame_vals.extend(tokens[1:])
+    else:
+      line = fid.readline().strip()
+      if not line:
+        break
+  return values
+
+
+class Amcvals2qpos(object):
+  """Callable that converts .amc values for a frame and to MuJoCo qpos format.
+  """
+
+  def __init__(self, index2joint, joint_order):
+    """Initializes a new Amcvals2qpos instance.
+
+    Args:
+      index2joint: List of joint angles in .amc file.
+      joint_order: List of joint names in MuJoco MJCF.
+    """
+    # Root is x,y,z, then quat.
+    # need to get indices of qpos that order for amc default order
+    self.qpos_root_xyz_ind = [0, 1, 2]
+    self.root_xyz_ransform = np.array(
+        [[1, 0, 0], [0, 0, -1], [0, 1, 0]]) * CONVERSION_LENGTH
+    self.qpos_root_quat_ind = [3, 4, 5, 6]
+    amc2qpos_transform = np.zeros((len(index2joint), len(joint_order)))
+    for i in range(len(index2joint)):
+      for j in range(len(joint_order)):
+        if index2joint[i] == joint_order[j]:
+          if 'rx' in index2joint[i]:
+            amc2qpos_transform[i][j] = 1
+          elif 'ry' in index2joint[i]:
+            amc2qpos_transform[i][j] = 1
+          elif 'rz' in index2joint[i]:
+            amc2qpos_transform[i][j] = 1
+    self.amc2qpos_transform = amc2qpos_transform
+
+  def __call__(self, amc_val):
+    """Converts a `.amc` frame to MuJoCo qpos format."""
+    amc_val_rad = np.deg2rad(amc_val)
+    qpos = np.dot(self.amc2qpos_transform, amc_val_rad)
+
+    # Root.
+    qpos[:3] = np.dot(self.root_xyz_ransform, amc_val[:3])
+    qpos_quat = euler2quat(amc_val[3], amc_val[4], amc_val[5])
+    qpos_quat = mj_quatprod(euler2quat(90, 0, 0), qpos_quat)
+
+    for i, ind in enumerate(self.qpos_root_quat_ind):
+      qpos[ind] = qpos_quat[i]
+
+    return qpos
+
+
+def euler2quat(ax, ay, az):
+  """Converts euler angles to a quaternion.
+
+  Note: rotation order is zyx
+
+  Args:
+    ax: Roll angle (deg)
+    ay: Pitch angle (deg).
+    az: Yaw angle (deg).
+
+  Returns:
+    A numpy array representing the rotation as a quaternion.
+  """
+  r1 = az
+  r2 = ay
+  r3 = ax
+
+  c1 = np.cos(np.deg2rad(r1 / 2))
+  s1 = np.sin(np.deg2rad(r1 / 2))
+  c2 = np.cos(np.deg2rad(r2 / 2))
+  s2 = np.sin(np.deg2rad(r2 / 2))
+  c3 = np.cos(np.deg2rad(r3 / 2))
+  s3 = np.sin(np.deg2rad(r3 / 2))
+
+  q0 = c1 * c2 * c3 + s1 * s2 * s3
+  q1 = c1 * c2 * s3 - s1 * s2 * c3
+  q2 = c1 * s2 * c3 + s1 * c2 * s3
+  q3 = s1 * c2 * c3 - c1 * s2 * s3
+
+  return np.array([q0, q1, q2, q3])
+
+
+def mj_quatprod(q, r):
+  quaternion = np.zeros(4)
+  mjlib.mju_mulQuat(quaternion, np.ascontiguousarray(q),
+                    np.ascontiguousarray(r))
+  return quaternion
+
+
+def mj_quat2vel(q, dt):
+  vel = np.zeros(3)
+  mjlib.mju_quat2Vel(vel, np.ascontiguousarray(q), dt)
+  return vel
+
+
+def mj_quatneg(q):
+  quaternion = np.zeros(4)
+  mjlib.mju_negQuat(quaternion, np.ascontiguousarray(q))
+  return quaternion
+
+
+def mj_quatdiff(source, target):
+  return mj_quatprod(mj_quatneg(source), np.ascontiguousarray(target))
--- a/DPI/local_dm_control_suite/utils/parse_amc_test.py
+++ b/DPI/local_dm_control_suite/utils/parse_amc_test.py
@ -0,0 +1,68 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Tests for parse_amc utility."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+# Internal dependencies.
+
+from absl.testing import absltest
+from local_dm_control_suite import humanoid_CMU
+from dm_control.suite.utils import parse_amc
+
+from dm_control.utils import io as resources
+
+_TEST_AMC_PATH = resources.GetResourceFilename(
+    os.path.join(os.path.dirname(__file__), '../demos/zeros.amc'))
+
+
+class ParseAMCTest(absltest.TestCase):
+
+  def test_sizes_of_parsed_data(self):
+
+    # Instantiate the humanoid environment.
+    env = humanoid_CMU.stand()
+
+    # Parse and convert specified clip.
+    converted = parse_amc.convert(
+        _TEST_AMC_PATH, env.physics, env.control_timestep())
+
+    self.assertEqual(converted.qpos.shape[0], 63)
+    self.assertEqual(converted.qvel.shape[0], 62)
+    self.assertEqual(converted.time.shape[0], converted.qpos.shape[1])
+    self.assertEqual(converted.qpos.shape[1],
+                     converted.qvel.shape[1] + 1)
+
+    # Parse and convert specified clip -- WITH SMALLER TIMESTEP
+    converted2 = parse_amc.convert(
+        _TEST_AMC_PATH, env.physics, 0.5 * env.control_timestep())
+
+    self.assertEqual(converted2.qpos.shape[0], 63)
+    self.assertEqual(converted2.qvel.shape[0], 62)
+    self.assertEqual(converted2.time.shape[0], converted2.qpos.shape[1])
+    self.assertEqual(converted.qpos.shape[1],
+                     converted.qvel.shape[1] + 1)
+
+    # Compare sizes of parsed objects for different timesteps
+    self.assertEqual(converted.qpos.shape[1] * 2, converted2.qpos.shape[1])
+
+
+if __name__ == '__main__':
+  absltest.main()
--- a/DPI/local_dm_control_suite/utils/randomizers.py
+++ b/DPI/local_dm_control_suite/utils/randomizers.py
@ -0,0 +1,91 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Randomization functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from dm_control.mujoco.wrapper import mjbindings
+import numpy as np
+from six.moves import range
+
+
+def random_limited_quaternion(random, limit):
+  """Generates a random quaternion limited to the specified rotations."""
+  axis = random.randn(3)
+  axis /= np.linalg.norm(axis)
+  angle = random.rand() * limit
+
+  quaternion = np.zeros(4)
+  mjbindings.mjlib.mju_axisAngle2Quat(quaternion, axis, angle)
+
+  return quaternion
+
+
+def randomize_limited_and_rotational_joints(physics, random=None):
+  """Randomizes the positions of joints defined in the physics body.
+
+  The following randomization rules apply:
+    - Bounded joints (hinges or sliders) are sampled uniformly in the bounds.
+    - Unbounded hinges are samples uniformly in [-pi, pi]
+    - Quaternions for unlimited free joints and ball joints are sampled
+      uniformly on the unit 3-sphere.
+    - Quaternions for limited ball joints are sampled uniformly on a sector
+      of the unit 3-sphere.
+    - The linear degrees of freedom of free joints are not randomized.
+
+  Args:
+    physics: Instance of 'Physics' class that holds a loaded model.
+    random: Optional instance of 'np.random.RandomState'. Defaults to the global
+      NumPy random state.
+  """
+  random = random or np.random
+
+  hinge = mjbindings.enums.mjtJoint.mjJNT_HINGE
+  slide = mjbindings.enums.mjtJoint.mjJNT_SLIDE
+  ball = mjbindings.enums.mjtJoint.mjJNT_BALL
+  free = mjbindings.enums.mjtJoint.mjJNT_FREE
+
+  qpos = physics.named.data.qpos
+
+  for joint_id in range(physics.model.njnt):
+    joint_name = physics.model.id2name(joint_id, 'joint')
+    joint_type = physics.model.jnt_type[joint_id]
+    is_limited = physics.model.jnt_limited[joint_id]
+    range_min, range_max = physics.model.jnt_range[joint_id]
+
+    if is_limited:
+      if joint_type == hinge or joint_type == slide:
+        qpos[joint_name] = random.uniform(range_min, range_max)
+
+      elif joint_type == ball:
+        qpos[joint_name] = random_limited_quaternion(random, range_max)
+
+    else:
+      if joint_type == hinge:
+        qpos[joint_name] = random.uniform(-np.pi, np.pi)
+
+      elif joint_type == ball:
+        quat = random.randn(4)
+        quat /= np.linalg.norm(quat)
+        qpos[joint_name] = quat
+
+      elif joint_type == free:
+        quat = random.rand(4)
+        quat /= np.linalg.norm(quat)
+        qpos[joint_name][3:] = quat
+
--- a/DPI/local_dm_control_suite/utils/randomizers_test.py
+++ b/DPI/local_dm_control_suite/utils/randomizers_test.py
@ -0,0 +1,164 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Tests for randomizers.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# Internal dependencies.
+from absl.testing import absltest
+from absl.testing import parameterized
+from dm_control import mujoco
+from dm_control.mujoco.wrapper import mjbindings
+from dm_control.suite.utils import randomizers
+import numpy as np
+from six.moves import range
+
+mjlib = mjbindings.mjlib
+
+
+class RandomizeUnlimitedJointsTest(parameterized.TestCase):
+
+  def setUp(self):
+    self.rand = np.random.RandomState(100)
+
+  def test_single_joint_of_each_type(self):
+    physics = mujoco.Physics.from_xml_string("""<mujoco>
+          <default>
+            <joint range="0 90" />
+          </default>
+          <worldbody>
+            <body>
+              <geom type="box" size="1 1 1"/>
+              <joint name="free" type="free"/>
+            </body>
+            <body>
+              <geom type="box" size="1 1 1"/>
+              <joint name="limited_hinge" type="hinge" limited="true"/>
+              <joint name="slide" type="slide"/>
+              <joint name="limited_slide" type="slide" limited="true"/>
+              <joint name="hinge" type="hinge"/>
+            </body>
+            <body>
+              <geom type="box" size="1 1 1"/>
+              <joint name="ball" type="ball"/>
+            </body>
+            <body>
+              <geom type="box" size="1 1 1"/>
+              <joint name="limited_ball" type="ball" limited="true"/>
+            </body>
+          </worldbody>
+        </mujoco>""")
+
+    randomizers.randomize_limited_and_rotational_joints(physics, self.rand)
+    self.assertNotEqual(0., physics.named.data.qpos['hinge'])
+    self.assertNotEqual(0., physics.named.data.qpos['limited_hinge'])
+    self.assertNotEqual(0., physics.named.data.qpos['limited_slide'])
+
+    self.assertNotEqual(0., np.sum(physics.named.data.qpos['ball']))
+    self.assertNotEqual(0., np.sum(physics.named.data.qpos['limited_ball']))
+
+    self.assertNotEqual(0., np.sum(physics.named.data.qpos['free'][3:]))
+
+    # Unlimited slide and the positional part of the free joint remains
+    # uninitialized.
+    self.assertEqual(0., physics.named.data.qpos['slide'])
+    self.assertEqual(0., np.sum(physics.named.data.qpos['free'][:3]))
+
+  def test_multiple_joints_of_same_type(self):
+    physics = mujoco.Physics.from_xml_string("""<mujoco>
+          <worldbody>
+            <body>
+              <geom type="box" size="1 1 1"/>
+              <joint name="hinge_1" type="hinge"/>
+              <joint name="hinge_2" type="hinge"/>
+              <joint name="hinge_3" type="hinge"/>
+            </body>
+          </worldbody>
+        </mujoco>""")
+
+    randomizers.randomize_limited_and_rotational_joints(physics, self.rand)
+    self.assertNotEqual(0., physics.named.data.qpos['hinge_1'])
+    self.assertNotEqual(0., physics.named.data.qpos['hinge_2'])
+    self.assertNotEqual(0., physics.named.data.qpos['hinge_3'])
+
+    self.assertNotEqual(physics.named.data.qpos['hinge_1'],
+                        physics.named.data.qpos['hinge_2'])
+
+    self.assertNotEqual(physics.named.data.qpos['hinge_2'],
+                        physics.named.data.qpos['hinge_3'])
+
+    self.assertNotEqual(physics.named.data.qpos['hinge_1'],
+                        physics.named.data.qpos['hinge_3'])
+
+  def test_unlimited_hinge_randomization_range(self):
+    physics = mujoco.Physics.from_xml_string("""<mujoco>
+          <worldbody>
+            <body>
+              <geom type="box" size="1 1 1"/>
+              <joint name="hinge" type="hinge"/>
+            </body>
+          </worldbody>
+        </mujoco>""")
+
+    for _ in range(10):
+      randomizers.randomize_limited_and_rotational_joints(physics, self.rand)
+      self.assertBetween(physics.named.data.qpos['hinge'], -np.pi, np.pi)
+
+  def test_limited_1d_joint_limits_are_respected(self):
+    physics = mujoco.Physics.from_xml_string("""<mujoco>
+          <default>
+            <joint limited="true"/>
+          </default>
+          <worldbody>
+            <body>
+              <geom type="box" size="1 1 1"/>
+              <joint name="hinge" type="hinge" range="0 10"/>
+              <joint name="slide" type="slide" range="30 50"/>
+            </body>
+          </worldbody>
+        </mujoco>""")
+
+    for _ in range(10):
+      randomizers.randomize_limited_and_rotational_joints(physics, self.rand)
+      self.assertBetween(physics.named.data.qpos['hinge'],
+                         np.deg2rad(0), np.deg2rad(10))
+      self.assertBetween(physics.named.data.qpos['slide'], 30, 50)
+
+  def test_limited_ball_joint_are_respected(self):
+    physics = mujoco.Physics.from_xml_string("""<mujoco>
+          <worldbody>
+            <body name="body" zaxis="1 0 0">
+              <geom type="box" size="1 1 1"/>
+              <joint name="ball" type="ball" limited="true" range="0 60"/>
+            </body>
+          </worldbody>
+        </mujoco>""")
+
+    body_axis = np.array([1., 0., 0.])
+    joint_axis = np.zeros(3)
+    for _ in range(10):
+      randomizers.randomize_limited_and_rotational_joints(physics, self.rand)
+
+      quat = physics.named.data.qpos['ball']
+      mjlib.mju_rotVecQuat(joint_axis, body_axis, quat)
+      angle_cos = np.dot(body_axis, joint_axis)
+      self.assertGreater(angle_cos, 0.5)  # cos(60) = 0.5
+
+
+if __name__ == '__main__':
+  absltest.main()
--- a/DPI/local_dm_control_suite/walker.py
+++ b/DPI/local_dm_control_suite/walker.py
@ -0,0 +1,158 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Planar Walker Domain."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from local_dm_control_suite import base
+from local_dm_control_suite import common
+from dm_control.suite.utils import randomizers
+from dm_control.utils import containers
+from dm_control.utils import rewards
+
+
+_DEFAULT_TIME_LIMIT = 25
+_CONTROL_TIMESTEP = .025
+
+# Minimal height of torso over foot above which stand reward is 1.
+_STAND_HEIGHT = 1.2
+
+# Horizontal speeds (meters/second) above which move reward is 1.
+_WALK_SPEED = 1
+_RUN_SPEED = 8
+
+
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('walker.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking')
+def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Stand task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = PlanarWalker(move_speed=0, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Walk task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = PlanarWalker(move_speed=_WALK_SPEED, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Run task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = PlanarWalker(move_speed=_RUN_SPEED, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Walker domain."""
+
+  def torso_upright(self):
+    """Returns projection from z-axes of torso to the z-axes of world."""
+    return self.named.data.xmat['torso', 'zz']
+
+  def torso_height(self):
+    """Returns the height of the torso."""
+    return self.named.data.xpos['torso', 'z']
+
+  def horizontal_velocity(self):
+    """Returns the horizontal velocity of the center-of-mass."""
+    return self.named.data.sensordata['torso_subtreelinvel'][0]
+
+  def orientations(self):
+    """Returns planar orientations of all bodies."""
+    return self.named.data.xmat[1:, ['xx', 'xz']].ravel()
+
+
+class PlanarWalker(base.Task):
+  """A planar walker task."""
+
+  def __init__(self, move_speed, random=None):
+    """Initializes an instance of `PlanarWalker`.
+
+    Args:
+      move_speed: A float. If this value is zero, reward is given simply for
+        standing up. Otherwise this specifies a target horizontal velocity for
+        the walking task.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._move_speed = move_speed
+    super(PlanarWalker, self).__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+
+    In 'standing' mode, use initial orientation and small velocities.
+    In 'random' mode, randomize joint angles and let fall to the floor.
+
+    Args:
+      physics: An instance of `Physics`.
+
+    """
+    randomizers.randomize_limited_and_rotational_joints(physics, self.random)
+    super(PlanarWalker, self).initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of body orientations, height and velocites."""
+    obs = collections.OrderedDict()
+    obs['orientations'] = physics.orientations()
+    obs['height'] = physics.torso_height()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+    standing = rewards.tolerance(physics.torso_height(),
+                                 bounds=(_STAND_HEIGHT, float('inf')),
+                                 margin=_STAND_HEIGHT/2)
+    upright = (1 + physics.torso_upright()) / 2
+    stand_reward = (3*standing + upright) / 4
+    if self._move_speed == 0:
+      return stand_reward
+    else:
+      move_reward = rewards.tolerance(physics.horizontal_velocity(),
+                                      bounds=(self._move_speed, float('inf')),
+                                      margin=self._move_speed/2,
+                                      value_at_margin=0.5,
+                                      sigmoid='linear')
+      return stand_reward * (5*move_reward + 1) / 6
--- a/DPI/local_dm_control_suite/walker.xml
+++ b/DPI/local_dm_control_suite/walker.xml
@ -0,0 +1,70 @@
+<mujoco model="planar walker">
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials_white_floor.xml"/>
+
+  <option timestep="0.0025"/>
+
+  <statistic extent="2" center="0 0 1"/>
+
+  <default>
+    <joint damping=".1" armature="0.01" limited="true" solimplimit="0 .99 .01"/>
+    <geom contype="1" conaffinity="0" friction=".7 .1 .1"/>
+    <motor ctrlrange="-1 1" ctrllimited="true"/>
+    <site size="0.01"/>
+    <default class="walker">
+      <geom material="self" type="capsule"/>
+      <joint axis="0 -1 0"/>
+    </default>
+  </default>
+
+  <worldbody>
+    <geom name="floor" type="plane" conaffinity="1" pos="248 0 0" size="250 .8 .2" material="grid" zaxis="0 0 1"/>
+    <body name="torso" pos="0 0 1.3" childclass="walker">
+      <light name="light" pos="0 0 2" mode="trackcom"/>
+      <camera name="side" pos="0 -2 .7" euler="60 0 0" mode="trackcom"/>
+      <camera name="back" pos="-2 0 .5" xyaxes="0 -1 0 1 0 3" mode="trackcom"/>
+      <joint name="rootz" axis="0 0 1" type="slide" limited="false" armature="0" damping="0"/>
+      <joint name="rootx" axis="1 0 0" type="slide" limited="false" armature="0" damping="0"/>
+      <joint name="rooty" axis="0 1 0" type="hinge" limited="false" armature="0" damping="0"/>
+      <geom name="torso" size="0.07 0.3"/>
+      <body name="right_thigh" pos="0 -.05 -0.3">
+        <joint name="right_hip" range="-20 100"/>
+        <geom name="right_thigh" pos="0 0 -0.225" size="0.05 0.225"/>
+        <body name="right_leg" pos="0 0 -0.7">
+          <joint name="right_knee" pos="0 0 0.25" range="-150 0"/>
+          <geom name="right_leg" size="0.04 0.25"/>
+          <body name="right_foot" pos="0.06 0 -0.25">
+            <joint name="right_ankle" pos="-0.06 0 0" range="-45 45"/>
+            <geom name="right_foot" zaxis="1 0 0" size="0.05 0.1"/>
+          </body>
+        </body>
+      </body>
+      <body name="left_thigh" pos="0 .05 -0.3" >
+        <joint name="left_hip" range="-20 100"/>
+        <geom name="left_thigh" pos="0 0 -0.225" size="0.05 0.225"/>
+        <body name="left_leg" pos="0 0 -0.7">
+          <joint name="left_knee" pos="0 0 0.25" range="-150 0"/>
+          <geom name="left_leg" size="0.04 0.25"/>
+          <body name="left_foot" pos="0.06 0 -0.25">
+            <joint name="left_ankle" pos="-0.06 0 0" range="-45 45"/>
+            <geom name="left_foot" zaxis="1 0 0" size="0.05 0.1"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+
+  <sensor>
+    <subtreelinvel name="torso_subtreelinvel" body="torso"/>
+  </sensor>
+
+  <actuator>
+    <motor name="right_hip" joint="right_hip" gear="100"/>
+    <motor name="right_knee" joint="right_knee" gear="50"/>
+    <motor name="right_ankle" joint="right_ankle" gear="20"/>
+    <motor name="left_hip" joint="left_hip" gear="100"/>
+    <motor name="left_knee" joint="left_knee" gear="50"/>
+    <motor name="left_ankle" joint="left_ankle" gear="20"/>
+  </actuator>
+</mujoco>
--- a/DPI/local_dm_control_suite/wrappers/init.py
+++ b/DPI/local_dm_control_suite/wrappers/init.py
@ -0,0 +1,16 @@
+# Copyright 2018 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Environment wrappers used to extend or modify environment behaviour."""
--- a/DPI/local_dm_control_suite/wrappers/action_noise.py
+++ b/DPI/local_dm_control_suite/wrappers/action_noise.py
@ -0,0 +1,74 @@
+# Copyright 2018 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Wrapper control suite environments that adds Gaussian noise to actions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dm_env
+import numpy as np
+
+
+_BOUNDS_MUST_BE_FINITE = (
+    'All bounds in `env.action_spec()` must be finite, got: {action_spec}')
+
+
+class Wrapper(dm_env.Environment):
+  """Wraps a control environment and adds Gaussian noise to actions."""
+
+  def __init__(self, env, scale=0.01):
+    """Initializes a new action noise Wrapper.
+
+    Args:
+      env: The control suite environment to wrap.
+      scale: The standard deviation of the noise, expressed as a fraction
+        of the max-min range for each action dimension.
+
+    Raises:
+      ValueError: If any of the action dimensions of the wrapped environment are
+        unbounded.
+    """
+    action_spec = env.action_spec()
+    if not (np.all(np.isfinite(action_spec.minimum)) and
+            np.all(np.isfinite(action_spec.maximum))):
+      raise ValueError(_BOUNDS_MUST_BE_FINITE.format(action_spec=action_spec))
+    self._minimum = action_spec.minimum
+    self._maximum = action_spec.maximum
+    self._noise_std = scale * (action_spec.maximum - action_spec.minimum)
+    self._env = env
+
+  def step(self, action):
+    noisy_action = action + self._env.task.random.normal(scale=self._noise_std)
+    # Clip the noisy actions in place so that they fall within the bounds
+    # specified by the `action_spec`. Note that MuJoCo implicitly clips out-of-
+    # bounds control inputs, but we also clip here in case the actions do not
+    # correspond directly to MuJoCo actuators, or if there are other wrapper
+    # layers that expect the actions to be within bounds.
+    np.clip(noisy_action, self._minimum, self._maximum, out=noisy_action)
+    return self._env.step(noisy_action)
+
+  def reset(self):
+    return self._env.reset()
+
+  def observation_spec(self):
+    return self._env.observation_spec()
+
+  def action_spec(self):
+    return self._env.action_spec()
+
+  def __getattr__(self, name):
+    return getattr(self._env, name)
--- a/DPI/local_dm_control_suite/wrappers/action_noise_test.py
+++ b/DPI/local_dm_control_suite/wrappers/action_noise_test.py
@ -0,0 +1,136 @@
+# Copyright 2018 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Tests for the action noise wrapper."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# Internal dependencies.
+from absl.testing import absltest
+from absl.testing import parameterized
+from dm_control.rl import control
+from dm_control.suite.wrappers import action_noise
+from dm_env import specs
+import mock
+import numpy as np
+
+
+class ActionNoiseTest(parameterized.TestCase):
+
+  def make_action_spec(self, lower=(-1.,), upper=(1.,)):
+    lower, upper = np.broadcast_arrays(lower, upper)
+    return specs.BoundedArray(
+        shape=lower.shape, dtype=float, minimum=lower, maximum=upper)
+
+  def make_mock_env(self, action_spec=None):
+    action_spec = action_spec or self.make_action_spec()
+    env = mock.Mock(spec=control.Environment)
+    env.action_spec.return_value = action_spec
+    return env
+
+  def assertStepCalledOnceWithCorrectAction(self, env, expected_action):
+    # NB: `assert_called_once_with()` doesn't support numpy arrays.
+    env.step.assert_called_once()
+    actual_action = env.step.call_args_list[0][0][0]
+    np.testing.assert_array_equal(expected_action, actual_action)
+
+  @parameterized.parameters([
+      dict(lower=np.r_[-1., 0.], upper=np.r_[1., 2.], scale=0.05),
+      dict(lower=np.r_[-1., 0.], upper=np.r_[1., 2.], scale=0.),
+      dict(lower=np.r_[-1., 0.], upper=np.r_[-1., 0.], scale=0.05),
+  ])
+  def test_step(self, lower, upper, scale):
+    seed = 0
+    std = scale * (upper - lower)
+    expected_noise = np.random.RandomState(seed).normal(scale=std)
+    action = np.random.RandomState(seed).uniform(lower, upper)
+    expected_noisy_action = np.clip(action + expected_noise, lower, upper)
+    task = mock.Mock(spec=control.Task)
+    task.random = np.random.RandomState(seed)
+    action_spec = self.make_action_spec(lower=lower, upper=upper)
+    env = self.make_mock_env(action_spec=action_spec)
+    env.task = task
+    wrapped_env = action_noise.Wrapper(env, scale=scale)
+    time_step = wrapped_env.step(action)
+    self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action)
+    self.assertIs(time_step, env.step(expected_noisy_action))
+
+  @parameterized.named_parameters([
+      dict(testcase_name='within_bounds', action=np.r_[-1.], noise=np.r_[0.1]),
+      dict(testcase_name='below_lower', action=np.r_[-1.], noise=np.r_[-0.1]),
+      dict(testcase_name='above_upper', action=np.r_[1.], noise=np.r_[0.1]),
+  ])
+  def test_action_clipping(self, action, noise):
+    lower = -1.
+    upper = 1.
+    expected_noisy_action = np.clip(action + noise, lower, upper)
+    task = mock.Mock(spec=control.Task)
+    task.random = mock.Mock(spec=np.random.RandomState)
+    task.random.normal.return_value = noise
+    action_spec = self.make_action_spec(lower=lower, upper=upper)
+    env = self.make_mock_env(action_spec=action_spec)
+    env.task = task
+    wrapped_env = action_noise.Wrapper(env)
+    time_step = wrapped_env.step(action)
+    self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action)
+    self.assertIs(time_step, env.step(expected_noisy_action))
+
+  @parameterized.parameters([
+      dict(lower=np.r_[-1., 0.], upper=np.r_[1., np.inf]),
+      dict(lower=np.r_[np.nan, 0.], upper=np.r_[1., 2.]),
+  ])
+  def test_error_if_action_bounds_non_finite(self, lower, upper):
+    action_spec = self.make_action_spec(lower=lower, upper=upper)
+    env = self.make_mock_env(action_spec=action_spec)
+    with self.assertRaisesWithLiteralMatch(
+        ValueError,
+        action_noise._BOUNDS_MUST_BE_FINITE.format(action_spec=action_spec)):
+      _ = action_noise.Wrapper(env)
+
+  def test_reset(self):
+    env = self.make_mock_env()
+    wrapped_env = action_noise.Wrapper(env)
+    time_step = wrapped_env.reset()
+    env.reset.assert_called_once_with()
+    self.assertIs(time_step, env.reset())
+
+  def test_observation_spec(self):
+    env = self.make_mock_env()
+    wrapped_env = action_noise.Wrapper(env)
+    observation_spec = wrapped_env.observation_spec()
+    env.observation_spec.assert_called_once_with()
+    self.assertIs(observation_spec, env.observation_spec())
+
+  def test_action_spec(self):
+    env = self.make_mock_env()
+    wrapped_env = action_noise.Wrapper(env)
+    # `env.action_spec()` is called in `Wrapper.__init__()`
+    env.action_spec.reset_mock()
+    action_spec = wrapped_env.action_spec()
+    env.action_spec.assert_called_once_with()
+    self.assertIs(action_spec, env.action_spec())
+
+  @parameterized.parameters(['task', 'physics', 'control_timestep'])
+  def test_getattr(self, attribute_name):
+    env = self.make_mock_env()
+    wrapped_env = action_noise.Wrapper(env)
+    attr = getattr(wrapped_env, attribute_name)
+    self.assertIs(attr, getattr(env, attribute_name))
+
+
+if __name__ == '__main__':
+  absltest.main()
--- a/DPI/local_dm_control_suite/wrappers/pixels.py
+++ b/DPI/local_dm_control_suite/wrappers/pixels.py
@ -0,0 +1,120 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Wrapper that adds pixel observations to a control environment."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+import dm_env
+from dm_env import specs
+
+STATE_KEY = 'state'
+
+
+class Wrapper(dm_env.Environment):
+  """Wraps a control environment and adds a rendered pixel observation."""
+
+  def __init__(self, env, pixels_only=True, render_kwargs=None,
+               observation_key='pixels'):
+    """Initializes a new pixel Wrapper.
+
+    Args:
+      env: The environment to wrap.
+      pixels_only: If True (default), the original set of 'state' observations
+        returned by the wrapped environment will be discarded, and the
+        `OrderedDict` of observations will only contain pixels. If False, the
+        `OrderedDict` will contain the original observations as well as the
+        pixel observations.
+      render_kwargs: Optional `dict` containing keyword arguments passed to the
+        `mujoco.Physics.render` method.
+      observation_key: Optional custom string specifying the pixel observation's
+        key in the `OrderedDict` of observations. Defaults to 'pixels'.
+
+    Raises:
+      ValueError: If `env`'s observation spec is not compatible with the
+        wrapper. Supported formats are a single array, or a dict of arrays.
+      ValueError: If `env`'s observation already contains the specified
+        `observation_key`.
+    """
+    if render_kwargs is None:
+      render_kwargs = {}
+
+    wrapped_observation_spec = env.observation_spec()
+
+    if isinstance(wrapped_observation_spec, specs.Array):
+      self._observation_is_dict = False
+      invalid_keys = set([STATE_KEY])
+    elif isinstance(wrapped_observation_spec, collections.MutableMapping):
+      self._observation_is_dict = True
+      invalid_keys = set(wrapped_observation_spec.keys())
+    else:
+      raise ValueError('Unsupported observation spec structure.')
+
+    if not pixels_only and observation_key in invalid_keys:
+      raise ValueError('Duplicate or reserved observation key {!r}.'
+                       .format(observation_key))
+
+    if pixels_only:
+      self._observation_spec = collections.OrderedDict()
+    elif self._observation_is_dict:
+      self._observation_spec = wrapped_observation_spec.copy()
+    else:
+      self._observation_spec = collections.OrderedDict()
+      self._observation_spec[STATE_KEY] = wrapped_observation_spec
+
+    # Extend observation spec.
+    pixels = env.physics.render(**render_kwargs)
+    pixels_spec = specs.Array(
+        shape=pixels.shape, dtype=pixels.dtype, name=observation_key)
+    self._observation_spec[observation_key] = pixels_spec
+
+    self._env = env
+    self._pixels_only = pixels_only
+    self._render_kwargs = render_kwargs
+    self._observation_key = observation_key
+
+  def reset(self):
+    time_step = self._env.reset()
+    return self._add_pixel_observation(time_step)
+
+  def step(self, action):
+    time_step = self._env.step(action)
+    return self._add_pixel_observation(time_step)
+
+  def observation_spec(self):
+    return self._observation_spec
+
+  def action_spec(self):
+    return self._env.action_spec()
+
+  def _add_pixel_observation(self, time_step):
+    if self._pixels_only:
+      observation = collections.OrderedDict()
+    elif self._observation_is_dict:
+      observation = type(time_step.observation)(time_step.observation)
+    else:
+      observation = collections.OrderedDict()
+      observation[STATE_KEY] = time_step.observation
+
+    pixels = self._env.physics.render(**self._render_kwargs)
+    observation[self._observation_key] = pixels
+    return time_step._replace(observation=observation)
+
+  def __getattr__(self, name):
+    return getattr(self._env, name)
--- a/DPI/local_dm_control_suite/wrappers/pixels_test.py
+++ b/DPI/local_dm_control_suite/wrappers/pixels_test.py
@ -0,0 +1,133 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Tests for the pixel wrapper."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+# Internal dependencies.
+from absl.testing import absltest
+from absl.testing import parameterized
+from local_dm_control_suite import cartpole
+from dm_control.suite.wrappers import pixels
+import dm_env
+from dm_env import specs
+
+import numpy as np
+
+
+class FakePhysics(object):
+
+  def render(self, *args, **kwargs):
+    del args
+    del kwargs
+    return np.zeros((4, 5, 3), dtype=np.uint8)
+
+
+class FakeArrayObservationEnvironment(dm_env.Environment):
+
+  def __init__(self):
+    self.physics = FakePhysics()
+
+  def reset(self):
+    return dm_env.restart(np.zeros((2,)))
+
+  def step(self, action):
+    del action
+    return dm_env.transition(0.0, np.zeros((2,)))
+
+  def action_spec(self):
+    pass
+
+  def observation_spec(self):
+    return specs.Array(shape=(2,), dtype=np.float)
+
+
+class PixelsTest(parameterized.TestCase):
+
+  @parameterized.parameters(True, False)
+  def test_dict_observation(self, pixels_only):
+    pixel_key = 'rgb'
+
+    env = cartpole.swingup()
+
+    # Make sure we are testing the right environment for the test.
+    observation_spec = env.observation_spec()
+    self.assertIsInstance(observation_spec, collections.OrderedDict)
+
+    width = 320
+    height = 240
+
+    # The wrapper should only add one observation.
+    wrapped = pixels.Wrapper(env,
+                             observation_key=pixel_key,
+                             pixels_only=pixels_only,
+                             render_kwargs={'width': width, 'height': height})
+
+    wrapped_observation_spec = wrapped.observation_spec()
+    self.assertIsInstance(wrapped_observation_spec, collections.OrderedDict)
+
+    if pixels_only:
+      self.assertLen(wrapped_observation_spec, 1)
+      self.assertEqual([pixel_key], list(wrapped_observation_spec.keys()))
+    else:
+      expected_length = len(observation_spec) + 1
+      self.assertLen(wrapped_observation_spec, expected_length)
+      expected_keys = list(observation_spec.keys()) + [pixel_key]
+      self.assertEqual(expected_keys, list(wrapped_observation_spec.keys()))
+
+    # Check that the added spec item is consistent with the added observation.
+    time_step = wrapped.reset()
+    rgb_observation = time_step.observation[pixel_key]
+    wrapped_observation_spec[pixel_key].validate(rgb_observation)
+
+    self.assertEqual(rgb_observation.shape, (height, width, 3))
+    self.assertEqual(rgb_observation.dtype, np.uint8)
+
+  @parameterized.parameters(True, False)
+  def test_single_array_observation(self, pixels_only):
+    pixel_key = 'depth'
+
+    env = FakeArrayObservationEnvironment()
+    observation_spec = env.observation_spec()
+    self.assertIsInstance(observation_spec, specs.Array)
+
+    wrapped = pixels.Wrapper(env, observation_key=pixel_key,
+                             pixels_only=pixels_only)
+    wrapped_observation_spec = wrapped.observation_spec()
+    self.assertIsInstance(wrapped_observation_spec, collections.OrderedDict)
+
+    if pixels_only:
+      self.assertLen(wrapped_observation_spec, 1)
+      self.assertEqual([pixel_key], list(wrapped_observation_spec.keys()))
+    else:
+      self.assertLen(wrapped_observation_spec, 2)
+      self.assertEqual([pixels.STATE_KEY, pixel_key],
+                       list(wrapped_observation_spec.keys()))
+
+    time_step = wrapped.reset()
+
+    depth_observation = time_step.observation[pixel_key]
+    wrapped_observation_spec[pixel_key].validate(depth_observation)
+
+    self.assertEqual(depth_observation.shape, (4, 5, 3))
+    self.assertEqual(depth_observation.dtype, np.uint8)
+
+if __name__ == '__main__':
+  absltest.main()
--- a/DPI/log/video/999.mp4
+++ b/DPI/log/video/999.mp4
--- a/DPI/logger.py
+++ b/DPI/logger.py
@ -0,0 +1,170 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+from torch.utils.tensorboard import SummaryWriter
+from collections import defaultdict
+import json
+import os
+import shutil
+import torch
+import torchvision
+import numpy as np
+from termcolor import colored
+
+FORMAT_CONFIG = {
+    'rl': {
+        'train': [
+            ('episode', 'E', 'int'), ('step', 'S', 'int'), 
+            ('duration', 'D', 'time'), ('episode_reward', 'R', 'float'),
+            ('batch_reward', 'BR', 'float'), ('actor_loss', 'ALOSS', 'float'),
+            ('critic_loss', 'CLOSS', 'float'), ('ae_loss', 'RLOSS', 'float'),
+            ('max_rat', 'MR', 'float')
+        ],
+        'eval': [('step', 'S', 'int'), ('episode_reward', 'ER', 'float')]
+    }
+}
+
+
+class AverageMeter(object):
+    def __init__(self):
+        self._sum = 0
+        self._count = 0
+
+    def update(self, value, n=1):
+        self._sum += value
+        self._count += n
+
+    def value(self):
+        return self._sum / max(1, self._count)
+
+
+class MetersGroup(object):
+    def __init__(self, file_name, formating):
+        self._file_name = file_name
+        if os.path.exists(file_name):
+            os.remove(file_name)
+        self._formating = formating
+        self._meters = defaultdict(AverageMeter)
+
+    def log(self, key, value, n=1):
+        self._meters[key].update(value, n)
+
+    def _prime_meters(self):
+        data = dict()
+        for key, meter in self._meters.items():
+            if key.startswith('train'):
+                key = key[len('train') + 1:]
+            else:
+                key = key[len('eval') + 1:]
+            key = key.replace('/', '_')
+            data[key] = meter.value()
+        return data
+
+    def _dump_to_file(self, data):
+        with open(self._file_name, 'a') as f:
+            f.write(json.dumps(data) + '\n')
+
+    def _format(self, key, value, ty):
+        template = '%s: '
+        if ty == 'int':
+            template += '%d'
+        elif ty == 'float':
+            template += '%.04f'
+        elif ty == 'time':
+            template += '%.01f s'
+        else:
+            raise 'invalid format type: %s' % ty
+        return template % (key, value)
+
+    def _dump_to_console(self, data, prefix):
+        prefix = colored(prefix, 'yellow' if prefix == 'train' else 'green')
+        pieces = ['{:5}'.format(prefix)]
+        for key, disp_key, ty in self._formating:
+            value = data.get(key, 0)
+            pieces.append(self._format(disp_key, value, ty))
+        print('| %s' % (' | '.join(pieces)))
+
+    def dump(self, step, prefix):
+        if len(self._meters) == 0:
+            return
+        data = self._prime_meters()
+        data['step'] = step
+        self._dump_to_file(data)
+        self._dump_to_console(data, prefix)
+        self._meters.clear()
+
+
+class Logger(object):
+    def __init__(self, log_dir, use_tb=True, config='rl'):
+        self._log_dir = log_dir
+        if use_tb:
+            tb_dir = os.path.join(log_dir, 'tb')
+            if os.path.exists(tb_dir):
+                shutil.rmtree(tb_dir)
+            self._sw = SummaryWriter(tb_dir)
+        else:
+            self._sw = None
+        self._train_mg = MetersGroup(
+            os.path.join(log_dir, 'train.log'),
+            formating=FORMAT_CONFIG[config]['train']
+        )
+        self._eval_mg = MetersGroup(
+            os.path.join(log_dir, 'eval.log'),
+            formating=FORMAT_CONFIG[config]['eval']
+        )
+
+    def _try_sw_log(self, key, value, step):
+        if self._sw is not None:
+            self._sw.add_scalar(key, value, step)
+
+    def _try_sw_log_image(self, key, image, step):
+        if self._sw is not None:
+            assert image.dim() == 3
+            grid = torchvision.utils.make_grid(image.unsqueeze(1))
+            self._sw.add_image(key, grid, step)
+
+    def _try_sw_log_video(self, key, frames, step):
+        if self._sw is not None:
+            frames = torch.from_numpy(np.array(frames))
+            frames = frames.unsqueeze(0)
+            self._sw.add_video(key, frames, step, fps=30)
+
+    def _try_sw_log_histogram(self, key, histogram, step):
+        if self._sw is not None:
+            self._sw.add_histogram(key, histogram, step)
+
+    def log(self, key, value, step, n=1):
+        assert key.startswith('train') or key.startswith('eval')
+        if type(value) == torch.Tensor:
+            value = value.item()
+        self._try_sw_log(key, value / n, step)
+        mg = self._train_mg if key.startswith('train') else self._eval_mg
+        mg.log(key, value, n)
+
+    def log_param(self, key, param, step):
+        self.log_histogram(key + '_w', param.weight.data, step)
+        if hasattr(param.weight, 'grad') and param.weight.grad is not None:
+            self.log_histogram(key + '_w_g', param.weight.grad.data, step)
+        if hasattr(param, 'bias'):
+            self.log_histogram(key + '_b', param.bias.data, step)
+            if hasattr(param.bias, 'grad') and param.bias.grad is not None:
+                self.log_histogram(key + '_b_g', param.bias.grad.data, step)
+
+    def log_image(self, key, image, step):
+        assert key.startswith('train') or key.startswith('eval')
+        self._try_sw_log_image(key, image, step)
+
+    def log_video(self, key, frames, step):
+        assert key.startswith('train') or key.startswith('eval')
+        self._try_sw_log_video(key, frames, step)
+
+    def log_histogram(self, key, histogram, step):
+        assert key.startswith('train') or key.startswith('eval')
+        self._try_sw_log_histogram(key, histogram, step)
+
+    def dump(self, step):
+        self._train_mg.dump(step, 'train')
+        self._eval_mg.dump(step, 'eval')
--- a/DPI/models.py
+++ b/DPI/models.py
@ -0,0 +1,183 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.distributions.normal import Normal
+
+
+class ObservationEncoder(nn.Module):
+    def __init__(self, obs_shape, state_size, num_layers=4, num_filters=32, stride=None):
+        super().__init__()  
+
+        assert len(obs_shape) == 3
+
+        self.state_size = state_size
+
+        layers = []
+        for i in range(num_layers):
+            input_channels = obs_shape[0] if i == 0 else output_channels
+            output_channels = num_filters * (2 ** i)
+            layers.append(nn.Conv2d(in_channels=input_channels, out_channels= output_channels, kernel_size=4, stride=2))
+            layers.append(nn.ReLU())
+
+        self.convs = nn.Sequential(*layers)
+
+        self.fc = nn.Linear(256 * 3 * 3, 2 * state_size)
+
+    def forward(self, x):
+        x = self.convs(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        
+        # Mean and standard deviation
+        mean, std = torch.chunk(x, 2, dim=-1)
+        std = F.softplus(std)
+        std = torch.clamp(std, min=0.0, max=1e5)
+
+        # Normal Distribution
+        x = self.reparameterize(mean, std)
+        return x
+    
+    def reparameterize(self, mu, std):
+        eps = torch.randn_like(std)
+        return mu + eps * std
+
+
+class ObservationDecoder(nn.Module):
+    def __init__(self, state_size, output_shape):
+        super().__init__()
+
+        self.state_size = state_size
+        self.output_shape = output_shape
+        self.input_size = 256 * 3 * 3
+        self.in_channels = [self.input_size, 256, 128, 64]
+        self.out_channels = [256, 128, 64, 3]
+
+        if output_shape[1] == 84:
+            self.kernels = [5, 7, 5, 6]
+            self.output_padding = [1, 1, 1, 0]
+        elif output_shape[1] == 64:
+            self.kernels = [5, 5, 6, 6]
+            self.output_padding = [0, 0, 0, 0]
+
+        self.dense = nn.Linear(state_size, self.input_size)
+        
+        layers = []
+        for i in range(len(self.kernels)):
+            layers.append(nn.ConvTranspose2d(in_channels=self.in_channels[i], out_channels=self.out_channels[i], 
+                                             kernel_size=self.kernels[i], stride=2, output_padding=self.output_padding[i]))
+            if i!=len(self.kernels)-1:
+                layers.append(nn.ReLU())
+
+        self.convtranspose = nn.Sequential(*layers)
+        
+
+    def forward(self, features):
+        out_batch_shape = features.shape[:-1]
+        out = self.dense(features)
+        out = torch.reshape(out, [-1, self.input_size, 1, 1])
+        out = self.convtranspose(out)
+        
+        mean = torch.reshape(out, (*out_batch_shape, *self.output_shape))
+        out_dist = torch.distributions.independent.Independent(torch.distributions.Normal(mean, 1), len(self.output_shape))
+        return out_dist
+
+
+class TransitionModel(nn.Module):
+    def __init__(self, state_size, hidden_size, action_size, history_size):
+        super().__init__()
+
+        self.state_size = state_size
+        self.hidden_size = hidden_size
+        self.action_size = action_size
+        self.history_size = history_size
+        self.act_fn = nn.ReLU()
+
+        self.fc_state_action = nn.Linear(state_size + action_size, hidden_size)
+        self.history_cell = nn.GRUCell(hidden_size + history_size, history_size)
+        self.fc_state_prior  = nn.Linear(history_size + state_size + action_size, 2 * state_size)
+        self.fc_state_posterior = nn.Linear(history_size + state_size + action_size, 2 * state_size)
+
+    def init_states(self, batch_size, device):
+        self.prev_state = torch.zeros(batch_size, self.state_size).to(device)
+        self.prev_action = torch.zeros(batch_size, self.action_size).to(device)
+        self.prev_history = torch.zeros(batch_size, self.history_size).to(device)
+            
+
+    def get_dist(self, mean, std):
+        distribution = torch.distributions.Normal(mean, std)
+        distribution = torch.distributions.independent.Independent(distribution, 1)
+        return distribution
+
+    def imagine_step(self, prev_state, prev_action, prev_history):
+        state_action = self.act_fn(self.fc_state_action(torch.cat([prev_state, prev_action], dim=-1)))
+        history = self.history_cell(torch.cat([state_action, prev_history], dim=-1), prev_history)
+
+        state_prior = self.fc_state_prior(torch.cat([history, prev_state, prev_action], dim=-1))
+        state_prior_mean, state_prior_std = torch.chunk(state_prior, 2, dim=-1)
+        state_prior_std = F.softplus(state_prior_std)
+
+        sample_state_prior = self.reparemeterize(state_prior_mean, state_prior_std)
+        prior = {"mean": state_prior_mean, "std": state_prior_std, "sample": sample_state_prior, "history": history}
+        return prior
+    
+    def reparemeterize(self, mean, std):
+        eps = torch.randn_like(std)
+        return mean + eps * std
+    
+
+class CLUBSample(nn.Module):  # Sampled version of the CLUB estimator
+    def __init__(self, x_dim, y_dim, hidden_size):
+        super(CLUBSample, self).__init__()
+        self.p_mu = nn.Sequential(nn.Linear(x_dim, hidden_size//2),
+                                       nn.ReLU(),
+                                       nn.Linear(hidden_size//2, y_dim))
+
+        self.p_logvar = nn.Sequential(nn.Linear(x_dim, hidden_size//2),
+                                       nn.ReLU(),
+                                       nn.Linear(hidden_size//2, y_dim),
+                                       nn.Tanh())
+
+    def get_mu_logvar(self, x_samples):
+        mu = self.p_mu(x_samples)
+        logvar = self.p_logvar(x_samples)
+        return mu, logvar
+     
+        
+    def loglikeli(self, x_samples, y_samples):
+        mu, logvar = self.get_mu_logvar(x_samples)
+        return (-(mu - y_samples)**2 /logvar.exp()-logvar).sum(dim=1).mean(dim=0)
+    
+
+    def forward(self, x_samples, y_samples):
+        mu, logvar = self.get_mu_logvar(x_samples)
+        
+        sample_size = x_samples.shape[0]
+        #random_index = torch.randint(sample_size, (sample_size,)).long()
+        random_index = torch.randperm(sample_size).long()
+        
+        positive = - (mu - y_samples)**2 / logvar.exp()
+        negative = - (mu - y_samples[random_index])**2 / logvar.exp()
+        upper_bound = (positive.sum(dim = -1) - negative.sum(dim = -1)).mean()
+        return upper_bound/2.
+
+    def learning_loss(self, x_samples, y_samples):
+        return - self.loglikeli(x_samples, y_samples)
+    
+if __name__ == "__main__":
+    encoder = ObservationEncoder((12,84,84), 256)
+    x = torch.randn(100, 12, 84, 84)
+    print(encoder(x).shape)
+
+    club = CLUBSample(256, 256 , 512)
+    x = torch.randn(100, 256)
+    y = torch.randn(100, 256)
+    print(club.learning_loss(x, y))
+
+    x = torch.randn(100, 12, 84, 84)
+    y = torch.randn(100, 12, 84, 84)
+    x_enc = encoder(x)
+    y_enc = encoder(y)
+    print(x_enc.shape)
+    print(y_enc.shape)
+    print(club.learning_loss(x_enc, y_enc))
+
--- a/DPI/train.py
+++ b/DPI/train.py
@ -0,0 +1,196 @@
+import numpy as np
+import torch
+import argparse
+import os
+import gym
+import time
+import json
+import dmc2gym
+
+import wandb
+import utils
+from utils import ReplayBuffer, make_env
+from models import ObservationEncoder, ObservationDecoder, TransitionModel, CLUBSample
+from logger import Logger
+from video import VideoRecorder
+
+#from agent.baseline_agent import BaselineAgent
+#from agent.bisim_agent import BisimAgent
+#from agent.deepmdp_agent import DeepMDPAgent
+#from agents.navigation.carla_env import CarlaEnv
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    # environment
+    parser.add_argument('--domain_name', default='cheetah')
+    parser.add_argument('--task_name', default='run')
+    parser.add_argument('--image_size', default=84, type=int)
+    parser.add_argument('--channels', default=3, type=int)
+    parser.add_argument('--action_repeat', default=1, type=int)
+    parser.add_argument('--frame_stack', default=4, type=int)
+    parser.add_argument('--resource_files', type=str)
+    parser.add_argument('--eval_resource_files', type=str)
+    parser.add_argument('--img_source', default=None, type=str, choices=['color', 'noise', 'images', 'video', 'none'])
+    parser.add_argument('--total_frames', default=1000, type=int)
+    # replay buffer
+    parser.add_argument('--replay_buffer_capacity', default=100000, type=int)
+    parser.add_argument('--episode_length', default=1000, type=int)
+    # train
+    parser.add_argument('--agent', default='dpi', type=str, choices=['baseline', 'bisim', 'deepmdp', 'db', 'dpi', 'rpc'])
+    parser.add_argument('--init_steps', default=1000, type=int)
+    parser.add_argument('--num_train_steps', default=1000, type=int)
+    parser.add_argument('--batch_size', default=512, type=int)
+    parser.add_argument('--state_size', default=256, type=int)
+    parser.add_argument('--hidden_size', default=128, type=int)
+    parser.add_argument('--history_size', default=128, type=int)
+    parser.add_argument('--k', default=3, type=int, help='number of steps for inverse model')
+    parser.add_argument('--load_encoder', default=None, type=str)
+    # eval
+    parser.add_argument('--eval_freq', default=10, type=int)  # TODO: master had 10000
+    parser.add_argument('--num_eval_episodes', default=20, type=int)
+    # critic
+    parser.add_argument('--critic_lr', default=1e-3, type=float)
+    parser.add_argument('--critic_beta', default=0.9, type=float)
+    parser.add_argument('--critic_tau', default=0.005, type=float)
+    parser.add_argument('--critic_target_update_freq', default=2, type=int)
+    # actor
+    parser.add_argument('--actor_lr', default=1e-3, type=float)
+    parser.add_argument('--actor_beta', default=0.9, type=float)
+    parser.add_argument('--actor_log_std_min', default=-10, type=float)
+    parser.add_argument('--actor_log_std_max', default=2, type=float)
+    parser.add_argument('--actor_update_freq', default=2, type=int)
+    # encoder/decoder
+    parser.add_argument('--encoder_type', default='pixel', type=str, choices=['pixel', 'pixelCarla096', 'pixelCarla098', 'identity'])
+    parser.add_argument('--encoder_feature_dim', default=50, type=int)
+    parser.add_argument('--encoder_lr', default=1e-3, type=float)
+    parser.add_argument('--encoder_tau', default=0.005, type=float)
+    parser.add_argument('--encoder_stride', default=1, type=int)
+    parser.add_argument('--decoder_type', default='pixel', type=str, choices=['pixel', 'identity', 'contrastive', 'reward', 'inverse', 'reconstruction'])
+    parser.add_argument('--decoder_lr', default=1e-3, type=float)
+    parser.add_argument('--decoder_update_freq', default=1, type=int)
+    parser.add_argument('--decoder_weight_lambda', default=0.0, type=float)
+    parser.add_argument('--num_layers', default=4, type=int)
+    parser.add_argument('--num_filters', default=32, type=int)
+    # sac
+    parser.add_argument('--discount', default=0.99, type=float)
+    parser.add_argument('--init_temperature', default=0.01, type=float)
+    parser.add_argument('--alpha_lr', default=1e-3, type=float)
+    parser.add_argument('--alpha_beta', default=0.9, type=float)
+    # misc
+    parser.add_argument('--seed', default=1, type=int)
+    parser.add_argument('--seed_steps', default=5000, type=int)
+    parser.add_argument('--work_dir', default='.', type=str)
+    parser.add_argument('--save_tb', default=False, action='store_true')
+    parser.add_argument('--save_model', default=False, action='store_true')
+    parser.add_argument('--save_buffer', default=False, action='store_true')
+    parser.add_argument('--save_video', default=False, action='store_true')
+    parser.add_argument('--transition_model_type', default='', type=str, choices=['', 'deterministic', 'probabilistic', 'ensemble'])
+    parser.add_argument('--render', default=False, action='store_true')
+    parser.add_argument('--port', default=2000, type=int)
+    args = parser.parse_args()
+    return args
+
+
+
+
+
+
+
+class DPI:
+    def __init__(self, args):
+        # wandb config
+        #run = wandb.init(project="dpi")
+    
+        self.args = args
+
+        # environment setup
+        self.env = make_env(self.args)
+        self.env.seed(self.args.seed)
+
+        # stack several consecutive frames together
+        if self.args.encoder_type.startswith('pixel'):
+            self.env = utils.FrameStack(self.env, k=self.args.frame_stack)
+
+        # create replay buffer
+        self.data_buffer = ReplayBuffer(size=self.args.replay_buffer_capacity, 
+                                        obs_shape=(self.args.frame_stack*self.args.channels,self.args.image_size,self.args.image_size),
+                                        action_size=self.env.action_space.shape[0], 
+                                        seq_len=self.args.episode_length, 
+                                        batch_size=args.batch_size)
+
+        # create work directory
+        utils.make_dir(self.args.work_dir)
+        video_dir = utils.make_dir(os.path.join(self.args.work_dir, 'video'))
+        model_dir = utils.make_dir(os.path.join(self.args.work_dir, 'model'))
+        buffer_dir = utils.make_dir(os.path.join(self.args.work_dir, 'buffer'))
+
+        # create video recorder
+        #video = VideoRecorder(video_dir if args.save_video else None, resource_files=args.resource_files)
+        #video.init(enabled=True)
+
+        # create models
+        self.build_models(use_saved=False, saved_model_dir=model_dir)
+
+    def build_models(self, use_saved, saved_model_dir=None):
+        self.obs_encoder = ObservationEncoder(
+                            obs_shape=(self.args.frame_stack*self.args.channels,self.args.image_size,self.args.image_size), # (12,84,84)
+                            state_size=self.args.state_size # 128
+                            )
+
+        self.obs_decoder = ObservationDecoder(
+                            state_size=self.args.state_size, # 128
+                            output_shape=(self.args.frame_stack*self.args.channels,self.args.image_size,self.args.image_size) # (12,84,84)
+                            )
+
+        self.transition_model = TransitionModel(
+                            state_size=self.args.state_size, # 128
+                            hidden_size=self.args.hidden_size, # 256
+                            action_size=self.env.action_space.shape[0], # 6
+                            history_size=self.args.history_size, # 128
+                            )
+
+        # model parameters
+        self.model_parameters = list(self.obs_encoder.parameters()) + list(self.obs_decoder.parameters()) + list(self.transition_model.parameters())
+
+        # optimizer
+        self.optimizer = torch.optim.Adam(self.model_parameters, lr=self.args.encoder_lr)
+
+        if use_saved:
+            self._use_saved_models(saved_model_dir)
+
+    def _use_saved_models(self, saved_model_dir):
+        self.obs_encoder.load_state_dict(torch.load(os.path.join(saved_model_dir, 'obs_encoder.pt')))
+        self.obs_decoder.load_state_dict(torch.load(os.path.join(saved_model_dir, 'obs_decoder.pt')))
+        self.transition_model.load_state_dict(torch.load(os.path.join(saved_model_dir, 'transition_model.pt')))
+
+    def collect_random_episodes(self, episodes):
+        obs = self.env.reset()
+        done = False
+        
+        for episode_count in range(episodes):
+            for i in range(self.args.episode_length):
+                action = self.env.action_space.sample()
+                next_obs, _, done, _ = self.env.step(action)
+
+                self.data_buffer.add(obs, action, next_obs, episode_count+1, done)
+                if done:
+                    obs = self.env.reset()
+                    done=False  
+                else:
+                    obs = next_obs
+        print("Collected {} random episodes".format(episode_count+1))
+                #if args.save_video:
+                #    video.record(env)
+        #video.save('%d.mp4' % step)     
+        #video.close()
+
+    def upper_bound_minimization(self):
+        pass
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    dpi = DPI(args)
+    dpi.collect_random_episodes(episodes=5)
--- a/DPI/utils.py
+++ b/DPI/utils.py
@ -0,0 +1,170 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import torch
+import numpy as np
+import torch.nn as nn
+
+import gym
+import dmc2gym
+
+import random
+from collections import deque
+
+
+class eval_mode(object):
+    def __init__(self, *models):
+        self.models = models
+
+    def __enter__(self):
+        self.prev_states = []
+        for model in self.models:
+            self.prev_states.append(model.training)
+            model.train(False)
+
+    def __exit__(self, *args):
+        for model, state in zip(self.models, self.prev_states):
+            model.train(state)
+        return False
+
+
+def soft_update_params(net, target_net, tau):
+    for param, target_param in zip(net.parameters(), target_net.parameters()):
+        target_param.data.copy_(
+            tau * param.data + (1 - tau) * target_param.data
+        )
+
+
+def set_seed_everywhere(seed):
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+
+
+def module_hash(module):
+    result = 0
+    for tensor in module.state_dict().values():
+        result += tensor.sum().item()
+    return result
+
+
+def make_dir(dir_path):
+    try:
+        os.mkdir(dir_path)
+    except OSError:
+        pass
+    return dir_path
+
+
+def preprocess_obs(obs, bits=5):
+    """Preprocessing image, see https://arxiv.org/abs/1807.03039."""
+    bins = 2**bits
+    assert obs.dtype == torch.float32
+    if bits < 8:
+        obs = torch.floor(obs / 2**(8 - bits))
+    obs = obs / bins
+    obs = obs + torch.rand_like(obs) / bins
+    obs = obs - 0.5
+    return obs
+
+
+class FrameStack(gym.Wrapper):
+    def __init__(self, env, k):
+        gym.Wrapper.__init__(self, env)
+        self._k = k
+        self._frames = deque([], maxlen=k)
+        shp = env.observation_space.shape
+        self.observation_space = gym.spaces.Box(
+            low=0,
+            high=1,
+            shape=((shp[0] * k,) + shp[1:]),
+            dtype=env.observation_space.dtype
+        )
+        self._max_episode_steps = env._max_episode_steps
+
+    def reset(self):
+        obs = self.env.reset()
+        for _ in range(self._k):
+            self._frames.append(obs)
+        return self._get_obs()
+
+    def step(self, action):
+        obs, reward, done, info = self.env.step(action)
+        self._frames.append(obs)
+        return self._get_obs(), reward, done, info
+
+    def _get_obs(self):
+        assert len(self._frames) == self._k
+        return np.concatenate(list(self._frames), axis=0)
+
+
+class ReplayBuffer:
+    def __init__(self, size, obs_shape, action_size, seq_len, batch_size):
+        self.size = size
+        self.obs_shape = obs_shape
+        self.action_size = action_size
+        self.seq_len = seq_len
+        self.batch_size = batch_size
+        self.idx = 0
+        self.full = False
+        self.observations = np.empty((size, *obs_shape), dtype=np.uint8)
+        self.actions = np.empty((size, action_size), dtype=np.float32)
+        self.next_observations = np.empty((size, *obs_shape), dtype=np.uint8)
+        self.episode_count = np.zeros((size,), dtype=np.uint8) 
+        self.terminals = np.empty((size,), dtype=np.float32)
+        self.steps, self.episodes = 0, 0
+    
+    def add(self, obs, ac, next_obs, episode_count, done):
+        self.observations[self.idx] = obs
+        self.actions[self.idx] = ac
+        self.next_observations[self.idx] = next_obs
+        self.episode_count[self.idx] = episode_count
+        self.terminals[self.idx] = done
+        self.idx = (self.idx + 1) % self.size
+        self.full = self.full or self.idx == 0
+        self.steps += 1 
+        self.episodes = self.episodes + (1 if done else 0)
+
+    def _sample_idx(self, L):
+        valid_idx = False
+        while not valid_idx:
+            idx = np.random.randint(0, self.size if self.full else self.idx - L)
+            idxs = np.arange(idx, idx + L) % self.size
+            valid_idx = not self.idx in idxs[1:] 
+        return idxs
+
+    def _retrieve_batch(self, idxs, n, L):
+        vec_idxs = idxs.transpose().reshape(-1)  # Unroll indices
+        observations = self.observations[vec_idxs]
+        next_observations = self.next_observations[vec_idxs]
+        return observations.reshape(L, n, *observations.shape[1:]), self.actions[vec_idxs].reshape(L, n, -1), observations.reshape(L, n, *next_observations.shape[1:]), \
+                self.rewards[vec_idxs].reshape(L, n), self.terminals[vec_idxs].reshape(L, n)
+
+    def sample(self):
+        n = self.batch_size
+        l = self.seq_len
+        obs,acs,rews,terms= self._retrieve_batch(np.asarray([self._sample_idx(l) for _ in range(n)]), n, l)
+        return obs,acs,rews,terms
+    
+
+def make_env(args):
+    env = dmc2gym.make(
+            domain_name=args.domain_name,
+            task_name=args.task_name,
+            resource_files=args.resource_files,
+            img_source=args.img_source,
+            total_frames=args.total_frames,
+            seed=args.seed,
+            visualize_reward=False,
+            from_pixels=(args.encoder_type == 'pixel'),
+            height=args.image_size,
+            width=args.image_size,
+            frame_skip=args.action_repeat
+        )
+    return env
--- a/DPI/video.py
+++ b/DPI/video.py
@ -0,0 +1,60 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import imageio
+import os
+import numpy as np
+import glob
+
+from dmc2gym.natural_imgsource import RandomVideoSource
+
+
+class VideoRecorder(object):
+    def __init__(self, dir_name, resource_files=None, height=256, width=256, camera_id=0, fps=30):
+        self.dir_name = dir_name
+        self.height = height
+        self.width = width
+        self.camera_id = camera_id
+        self.fps = fps
+        self.frames = []
+        self.resource_files = resource_files
+        if resource_files:
+            files = glob.glob(os.path.expanduser(resource_files))
+            self._bg_source = RandomVideoSource((height, width), files, grayscale=False, total_frames=1000)
+        else:
+            self._bg_source = None
+
+    def init(self, enabled=True):
+        self.frames = []
+        self.enabled = self.dir_name is not None and enabled
+
+    def record(self, env):
+        if self.enabled:
+            frame = env.render(
+                mode='rgb_array',
+                height=self.height,
+                width=self.width,
+                camera_id=self.camera_id
+            )
+            if self._bg_source:
+                mask = np.logical_and((frame[:, :, 2] > frame[:, :, 1]), (frame[:, :, 2] > frame[:, :, 0]))  # hardcoded for dmc
+                bg = self._bg_source.get_image()
+                frame[mask] = bg[mask]
+            self.frames.append(frame)
+    
+    def record_from_obs(self, obs):
+        if self.enabled:
+            frame = obs
+            if self._bg_source:
+                mask = np.logical_and((frame[:, :, 2] > frame[:, :, 1]), (frame[:, :, 2] > frame[:, :, 0]))
+                bg = self._bg_source.get_image()
+                frame[mask] = bg[mask]
+            self.frames.append(frame.transpose(1, 2, 0))
+
+    def save(self, file_name):
+        if self.enabled:
+            path = os.path.join(self.dir_name, file_name)
+            imageio.mimsave(path, self.frames, fps=self.fps)