218 lines
7.8 KiB
Python
218 lines
7.8 KiB
Python
|
# Copyright 2017 The dm_control Authors.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
# ============================================================================
|
||
|
|
||
|
"""Finger Domain."""
|
||
|
|
||
|
from __future__ import absolute_import
|
||
|
from __future__ import division
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import collections
|
||
|
|
||
|
from dm_control import mujoco
|
||
|
from dm_control.rl import control
|
||
|
from local_dm_control_suite import base
|
||
|
from local_dm_control_suite import common
|
||
|
from dm_control.suite.utils import randomizers
|
||
|
from dm_control.utils import containers
|
||
|
import numpy as np
|
||
|
from six.moves import range
|
||
|
|
||
|
_DEFAULT_TIME_LIMIT = 20 # (seconds)
|
||
|
_CONTROL_TIMESTEP = .02 # (seconds)
|
||
|
# For TURN tasks, the 'tip' geom needs to enter a spherical target of sizes:
|
||
|
_EASY_TARGET_SIZE = 0.07
|
||
|
_HARD_TARGET_SIZE = 0.03
|
||
|
# Initial spin velocity for the Stop task.
|
||
|
_INITIAL_SPIN_VELOCITY = 100
|
||
|
# Spinning slower than this value (radian/second) is considered stopped.
|
||
|
_STOP_VELOCITY = 1e-6
|
||
|
# Spinning faster than this value (radian/second) is considered spinning.
|
||
|
_SPIN_VELOCITY = 15.0
|
||
|
|
||
|
|
||
|
SUITE = containers.TaggedTasks()
|
||
|
|
||
|
|
||
|
def get_model_and_assets():
|
||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||
|
return common.read_model('finger.xml'), common.ASSETS
|
||
|
|
||
|
|
||
|
@SUITE.add('benchmarking')
|
||
|
def spin(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||
|
"""Returns the Spin task."""
|
||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||
|
task = Spin(random=random)
|
||
|
environment_kwargs = environment_kwargs or {}
|
||
|
return control.Environment(
|
||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||
|
**environment_kwargs)
|
||
|
|
||
|
|
||
|
@SUITE.add('benchmarking')
|
||
|
def turn_easy(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||
|
environment_kwargs=None):
|
||
|
"""Returns the easy Turn task."""
|
||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||
|
task = Turn(target_radius=_EASY_TARGET_SIZE, random=random)
|
||
|
environment_kwargs = environment_kwargs or {}
|
||
|
return control.Environment(
|
||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||
|
**environment_kwargs)
|
||
|
|
||
|
|
||
|
@SUITE.add('benchmarking')
|
||
|
def turn_hard(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||
|
environment_kwargs=None):
|
||
|
"""Returns the hard Turn task."""
|
||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||
|
task = Turn(target_radius=_HARD_TARGET_SIZE, random=random)
|
||
|
environment_kwargs = environment_kwargs or {}
|
||
|
return control.Environment(
|
||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||
|
**environment_kwargs)
|
||
|
|
||
|
|
||
|
class Physics(mujoco.Physics):
|
||
|
"""Physics simulation with additional features for the Finger domain."""
|
||
|
|
||
|
def touch(self):
|
||
|
"""Returns logarithmically scaled signals from the two touch sensors."""
|
||
|
return np.log1p(self.named.data.sensordata[['touchtop', 'touchbottom']])
|
||
|
|
||
|
def hinge_velocity(self):
|
||
|
"""Returns the velocity of the hinge joint."""
|
||
|
return self.named.data.sensordata['hinge_velocity']
|
||
|
|
||
|
def tip_position(self):
|
||
|
"""Returns the (x,z) position of the tip relative to the hinge."""
|
||
|
return (self.named.data.sensordata['tip'][[0, 2]] -
|
||
|
self.named.data.sensordata['spinner'][[0, 2]])
|
||
|
|
||
|
def bounded_position(self):
|
||
|
"""Returns the positions, with the hinge angle replaced by tip position."""
|
||
|
return np.hstack((self.named.data.sensordata[['proximal', 'distal']],
|
||
|
self.tip_position()))
|
||
|
|
||
|
def velocity(self):
|
||
|
"""Returns the velocities (extracted from sensordata)."""
|
||
|
return self.named.data.sensordata[['proximal_velocity',
|
||
|
'distal_velocity',
|
||
|
'hinge_velocity']]
|
||
|
|
||
|
def target_position(self):
|
||
|
"""Returns the (x,z) position of the target relative to the hinge."""
|
||
|
return (self.named.data.sensordata['target'][[0, 2]] -
|
||
|
self.named.data.sensordata['spinner'][[0, 2]])
|
||
|
|
||
|
def to_target(self):
|
||
|
"""Returns the vector from the tip to the target."""
|
||
|
return self.target_position() - self.tip_position()
|
||
|
|
||
|
def dist_to_target(self):
|
||
|
"""Returns the signed distance to the target surface, negative is inside."""
|
||
|
return (np.linalg.norm(self.to_target()) -
|
||
|
self.named.model.site_size['target', 0])
|
||
|
|
||
|
|
||
|
class Spin(base.Task):
|
||
|
"""A Finger `Task` to spin the stopped body."""
|
||
|
|
||
|
def __init__(self, random=None):
|
||
|
"""Initializes a new `Spin` instance.
|
||
|
|
||
|
Args:
|
||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||
|
automatically (default).
|
||
|
"""
|
||
|
super(Spin, self).__init__(random=random)
|
||
|
|
||
|
def initialize_episode(self, physics):
|
||
|
physics.named.model.site_rgba['target', 3] = 0
|
||
|
physics.named.model.site_rgba['tip', 3] = 0
|
||
|
physics.named.model.dof_damping['hinge'] = .03
|
||
|
_set_random_joint_angles(physics, self.random)
|
||
|
super(Spin, self).initialize_episode(physics)
|
||
|
|
||
|
def get_observation(self, physics):
|
||
|
"""Returns state and touch sensors, and target info."""
|
||
|
obs = collections.OrderedDict()
|
||
|
obs['position'] = physics.bounded_position()
|
||
|
obs['velocity'] = physics.velocity()
|
||
|
obs['touch'] = physics.touch()
|
||
|
return obs
|
||
|
|
||
|
def get_reward(self, physics):
|
||
|
"""Returns a sparse reward."""
|
||
|
return float(physics.hinge_velocity() <= -_SPIN_VELOCITY)
|
||
|
|
||
|
|
||
|
class Turn(base.Task):
|
||
|
"""A Finger `Task` to turn the body to a target angle."""
|
||
|
|
||
|
def __init__(self, target_radius, random=None):
|
||
|
"""Initializes a new `Turn` instance.
|
||
|
|
||
|
Args:
|
||
|
target_radius: Radius of the target site, which specifies the goal angle.
|
||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||
|
automatically (default).
|
||
|
"""
|
||
|
self._target_radius = target_radius
|
||
|
super(Turn, self).__init__(random=random)
|
||
|
|
||
|
def initialize_episode(self, physics):
|
||
|
target_angle = self.random.uniform(-np.pi, np.pi)
|
||
|
hinge_x, hinge_z = physics.named.data.xanchor['hinge', ['x', 'z']]
|
||
|
radius = physics.named.model.geom_size['cap1'].sum()
|
||
|
target_x = hinge_x + radius * np.sin(target_angle)
|
||
|
target_z = hinge_z + radius * np.cos(target_angle)
|
||
|
physics.named.model.site_pos['target', ['x', 'z']] = target_x, target_z
|
||
|
physics.named.model.site_size['target', 0] = self._target_radius
|
||
|
|
||
|
_set_random_joint_angles(physics, self.random)
|
||
|
|
||
|
super(Turn, self).initialize_episode(physics)
|
||
|
|
||
|
def get_observation(self, physics):
|
||
|
"""Returns state, touch sensors, and target info."""
|
||
|
obs = collections.OrderedDict()
|
||
|
obs['position'] = physics.bounded_position()
|
||
|
obs['velocity'] = physics.velocity()
|
||
|
obs['touch'] = physics.touch()
|
||
|
obs['target_position'] = physics.target_position()
|
||
|
obs['dist_to_target'] = physics.dist_to_target()
|
||
|
return obs
|
||
|
|
||
|
def get_reward(self, physics):
|
||
|
return float(physics.dist_to_target() <= 0)
|
||
|
|
||
|
|
||
|
def _set_random_joint_angles(physics, random, max_attempts=1000):
|
||
|
"""Sets the joints to a random collision-free state."""
|
||
|
|
||
|
for _ in range(max_attempts):
|
||
|
randomizers.randomize_limited_and_rotational_joints(physics, random)
|
||
|
# Check for collisions.
|
||
|
physics.after_reset()
|
||
|
if physics.data.ncon == 0:
|
||
|
break
|
||
|
else:
|
||
|
raise RuntimeError('Could not find a collision-free state '
|
||
|
'after {} attempts'.format(max_attempts))
|