initial commit.
This commit is contained in:
commit
297b5bb62d
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*.egg-info
|
||||||
|
./dist
|
||||||
|
MUJOCO_LOG.TXT
|
52
Dreamer/dmc2gym/__init__.py
Normal file
52
Dreamer/dmc2gym/__init__.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
import gym
|
||||||
|
from gym.envs.registration import register
|
||||||
|
|
||||||
|
|
||||||
|
def make(
|
||||||
|
domain_name,
|
||||||
|
task_name,
|
||||||
|
resource_files,
|
||||||
|
img_source,
|
||||||
|
total_frames,
|
||||||
|
seed=1,
|
||||||
|
visualize_reward=True,
|
||||||
|
from_pixels=False,
|
||||||
|
height=84,
|
||||||
|
width=84,
|
||||||
|
camera_id=0,
|
||||||
|
frame_skip=1,
|
||||||
|
episode_length=1000,
|
||||||
|
environment_kwargs=None
|
||||||
|
):
|
||||||
|
env_id = 'dmc_%s_%s_%s-v1' % (domain_name, task_name, seed)
|
||||||
|
|
||||||
|
if from_pixels:
|
||||||
|
assert not visualize_reward, 'cannot use visualize reward when learning from pixels'
|
||||||
|
|
||||||
|
# shorten episode length
|
||||||
|
max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
|
||||||
|
|
||||||
|
if not env_id in gym.envs.registry.env_specs:
|
||||||
|
register(
|
||||||
|
id=env_id,
|
||||||
|
entry_point='dmc2gym.wrappers:DMCWrapper',
|
||||||
|
kwargs={
|
||||||
|
'domain_name': domain_name,
|
||||||
|
'task_name': task_name,
|
||||||
|
'resource_files': resource_files,
|
||||||
|
'img_source': img_source,
|
||||||
|
'total_frames': total_frames,
|
||||||
|
'task_kwargs': {
|
||||||
|
'random': seed
|
||||||
|
},
|
||||||
|
'environment_kwargs': environment_kwargs,
|
||||||
|
'visualize_reward': visualize_reward,
|
||||||
|
'from_pixels': from_pixels,
|
||||||
|
'height': height,
|
||||||
|
'width': width,
|
||||||
|
'camera_id': camera_id,
|
||||||
|
'frame_skip': frame_skip,
|
||||||
|
},
|
||||||
|
# max_episode_steps=max_episode_steps
|
||||||
|
)
|
||||||
|
return gym.make(env_id)
|
183
Dreamer/dmc2gym/natural_imgsource.py
Normal file
183
Dreamer/dmc2gym/natural_imgsource.py
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
|
||||||
|
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the license found in the
|
||||||
|
# LICENSE file in the root directory of this source tree.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
import skvideo.io
|
||||||
|
import random
|
||||||
|
import tqdm
|
||||||
|
|
||||||
|
class BackgroundMatting(object):
|
||||||
|
"""
|
||||||
|
Produce a mask by masking the given color. This is a simple strategy
|
||||||
|
but effective for many games.
|
||||||
|
"""
|
||||||
|
def __init__(self, color):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
color: a (r, g, b) tuple or single value for grayscale
|
||||||
|
"""
|
||||||
|
self._color = color
|
||||||
|
|
||||||
|
def get_mask(self, img):
|
||||||
|
return img == self._color
|
||||||
|
|
||||||
|
|
||||||
|
class ImageSource(object):
|
||||||
|
"""
|
||||||
|
Source of natural images to be added to a simulated environment.
|
||||||
|
"""
|
||||||
|
def get_image(self):
|
||||||
|
"""
|
||||||
|
Returns:
|
||||||
|
an RGB image of [h, w, 3] with a fixed shape.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
""" Called when an episode ends. """
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class FixedColorSource(ImageSource):
|
||||||
|
def __init__(self, shape, color):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
shape: [h, w]
|
||||||
|
color: a 3-tuple
|
||||||
|
"""
|
||||||
|
self.arr = np.zeros((shape[0], shape[1], 3))
|
||||||
|
self.arr[:, :] = color
|
||||||
|
|
||||||
|
def get_image(self):
|
||||||
|
return self.arr
|
||||||
|
|
||||||
|
|
||||||
|
class RandomColorSource(ImageSource):
|
||||||
|
def __init__(self, shape):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
shape: [h, w]
|
||||||
|
"""
|
||||||
|
self.shape = shape
|
||||||
|
self.arr = None
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self._color = np.random.randint(0, 256, size=(3,))
|
||||||
|
self.arr = np.zeros((self.shape[0], self.shape[1], 3))
|
||||||
|
self.arr[:, :] = self._color
|
||||||
|
|
||||||
|
def get_image(self):
|
||||||
|
return self.arr
|
||||||
|
|
||||||
|
|
||||||
|
class NoiseSource(ImageSource):
|
||||||
|
def __init__(self, shape, strength=255):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
shape: [h, w]
|
||||||
|
strength (int): the strength of noise, in range [0, 255]
|
||||||
|
"""
|
||||||
|
self.shape = shape
|
||||||
|
self.strength = strength
|
||||||
|
|
||||||
|
def get_image(self):
|
||||||
|
return np.random.randn(self.shape[0], self.shape[1], 3) * self.strength
|
||||||
|
|
||||||
|
|
||||||
|
class RandomImageSource(ImageSource):
|
||||||
|
def __init__(self, shape, filelist, total_frames=None, grayscale=False):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
shape: [h, w]
|
||||||
|
filelist: a list of image files
|
||||||
|
"""
|
||||||
|
self.grayscale = grayscale
|
||||||
|
self.total_frames = total_frames
|
||||||
|
self.shape = shape
|
||||||
|
self.filelist = filelist
|
||||||
|
self.build_arr()
|
||||||
|
self.current_idx = 0
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def build_arr(self):
|
||||||
|
self.total_frames = self.total_frames if self.total_frames else len(self.filelist)
|
||||||
|
self.arr = np.zeros((self.total_frames, self.shape[0], self.shape[1]) + ((3,) if not self.grayscale else (1,)))
|
||||||
|
for i in range(self.total_frames):
|
||||||
|
# if i % len(self.filelist) == 0: random.shuffle(self.filelist)
|
||||||
|
fname = self.filelist[i % len(self.filelist)]
|
||||||
|
if self.grayscale: im = cv2.imread(fname, cv2.IMREAD_GRAYSCALE)[..., None]
|
||||||
|
else: im = cv2.imread(fname, cv2.IMREAD_COLOR)
|
||||||
|
self.arr[i] = cv2.resize(im, (self.shape[1], self.shape[0])) ## THIS IS NOT A BUG! cv2 uses (width, height)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self._loc = np.random.randint(0, self.total_frames)
|
||||||
|
|
||||||
|
def get_image(self):
|
||||||
|
return self.arr[self._loc]
|
||||||
|
|
||||||
|
|
||||||
|
class RandomVideoSource(ImageSource):
|
||||||
|
def __init__(self, shape, filelist, total_frames=None, grayscale=False):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
shape: [h, w]
|
||||||
|
filelist: a list of video files
|
||||||
|
"""
|
||||||
|
self.grayscale = grayscale
|
||||||
|
self.total_frames = total_frames
|
||||||
|
self.shape = shape
|
||||||
|
self.filelist = filelist
|
||||||
|
self.build_arr()
|
||||||
|
self.current_idx = 0
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def build_arr(self):
|
||||||
|
if not self.total_frames:
|
||||||
|
self.total_frames = 0
|
||||||
|
self.arr = None
|
||||||
|
random.shuffle(self.filelist)
|
||||||
|
for fname in tqdm.tqdm(self.filelist, desc="Loading videos for natural", position=0):
|
||||||
|
if self.grayscale: frames = skvideo.io.vread(fname, outputdict={"-pix_fmt": "gray"})
|
||||||
|
else: frames = skvideo.io.vread(fname)
|
||||||
|
local_arr = np.zeros((frames.shape[0], self.shape[0], self.shape[1]) + ((3,) if not self.grayscale else (1,)))
|
||||||
|
for i in tqdm.tqdm(range(frames.shape[0]), desc="video frames", position=1):
|
||||||
|
local_arr[i] = cv2.resize(frames[i], (self.shape[1], self.shape[0])) ## THIS IS NOT A BUG! cv2 uses (width, height)
|
||||||
|
if self.arr is None:
|
||||||
|
self.arr = local_arr
|
||||||
|
else:
|
||||||
|
self.arr = np.concatenate([self.arr, local_arr], 0)
|
||||||
|
self.total_frames += local_arr.shape[0]
|
||||||
|
else:
|
||||||
|
self.arr = np.zeros((self.total_frames, self.shape[0], self.shape[1]) + ((3,) if not self.grayscale else (1,)))
|
||||||
|
total_frame_i = 0
|
||||||
|
file_i = 0
|
||||||
|
with tqdm.tqdm(total=self.total_frames, desc="Loading videos for natural") as pbar:
|
||||||
|
while total_frame_i < self.total_frames:
|
||||||
|
if file_i % len(self.filelist) == 0: random.shuffle(self.filelist)
|
||||||
|
file_i += 1
|
||||||
|
fname = self.filelist[file_i % len(self.filelist)]
|
||||||
|
if self.grayscale: frames = skvideo.io.vread(fname, outputdict={"-pix_fmt": "gray"})
|
||||||
|
else: frames = skvideo.io.vread(fname)
|
||||||
|
for frame_i in range(frames.shape[0]):
|
||||||
|
if total_frame_i >= self.total_frames: break
|
||||||
|
if self.grayscale:
|
||||||
|
self.arr[total_frame_i] = cv2.resize(frames[frame_i], (self.shape[1], self.shape[0]))[..., None] ## THIS IS NOT A BUG! cv2 uses (width, height)
|
||||||
|
else:
|
||||||
|
self.arr[total_frame_i] = cv2.resize(frames[frame_i], (self.shape[1], self.shape[0]))
|
||||||
|
pbar.update(1)
|
||||||
|
total_frame_i += 1
|
||||||
|
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self._loc = np.random.randint(0, self.total_frames)
|
||||||
|
|
||||||
|
def get_image(self):
|
||||||
|
img = self.arr[self._loc % self.total_frames]
|
||||||
|
self._loc += 1
|
||||||
|
return img
|
201
Dreamer/dmc2gym/wrappers.py
Normal file
201
Dreamer/dmc2gym/wrappers.py
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
from gym import core, spaces
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
import local_dm_control_suite as suite
|
||||||
|
from dm_env import specs
|
||||||
|
import numpy as np
|
||||||
|
import skimage.io
|
||||||
|
|
||||||
|
from dmc2gym import natural_imgsource
|
||||||
|
|
||||||
|
|
||||||
|
def _spec_to_box(spec):
|
||||||
|
def extract_min_max(s):
|
||||||
|
assert s.dtype == np.float64 or s.dtype == np.float32
|
||||||
|
dim = np.int(np.prod(s.shape))
|
||||||
|
if type(s) == specs.Array:
|
||||||
|
bound = np.inf * np.ones(dim, dtype=np.float32)
|
||||||
|
return -bound, bound
|
||||||
|
elif type(s) == specs.BoundedArray:
|
||||||
|
zeros = np.zeros(dim, dtype=np.float32)
|
||||||
|
return s.minimum + zeros, s.maximum + zeros
|
||||||
|
|
||||||
|
mins, maxs = [], []
|
||||||
|
for s in spec:
|
||||||
|
mn, mx = extract_min_max(s)
|
||||||
|
mins.append(mn)
|
||||||
|
maxs.append(mx)
|
||||||
|
low = np.concatenate(mins, axis=0)
|
||||||
|
high = np.concatenate(maxs, axis=0)
|
||||||
|
assert low.shape == high.shape
|
||||||
|
return spaces.Box(low, high, dtype=np.float32)
|
||||||
|
|
||||||
|
|
||||||
|
def _flatten_obs(obs):
|
||||||
|
obs_pieces = []
|
||||||
|
for v in obs.values():
|
||||||
|
flat = np.array([v]) if np.isscalar(v) else v.ravel()
|
||||||
|
obs_pieces.append(flat)
|
||||||
|
return np.concatenate(obs_pieces, axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
class DMCWrapper(core.Env):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
domain_name,
|
||||||
|
task_name,
|
||||||
|
resource_files,
|
||||||
|
img_source,
|
||||||
|
total_frames,
|
||||||
|
task_kwargs=None,
|
||||||
|
visualize_reward={},
|
||||||
|
from_pixels=False,
|
||||||
|
height=84,
|
||||||
|
width=84,
|
||||||
|
camera_id=0,
|
||||||
|
frame_skip=1,
|
||||||
|
environment_kwargs=None
|
||||||
|
):
|
||||||
|
assert 'random' in task_kwargs, 'please specify a seed, for deterministic behaviour'
|
||||||
|
self._from_pixels = from_pixels
|
||||||
|
self._height = height
|
||||||
|
self._width = width
|
||||||
|
self._camera_id = camera_id
|
||||||
|
self._frame_skip = frame_skip
|
||||||
|
self._img_source = img_source
|
||||||
|
self._resource_files = resource_files
|
||||||
|
|
||||||
|
# create task
|
||||||
|
self._env = suite.load(
|
||||||
|
domain_name=domain_name,
|
||||||
|
task_name=task_name,
|
||||||
|
task_kwargs=task_kwargs,
|
||||||
|
visualize_reward=visualize_reward,
|
||||||
|
environment_kwargs=environment_kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
# true and normalized action spaces
|
||||||
|
self._true_action_space = _spec_to_box([self._env.action_spec()])
|
||||||
|
self._norm_action_space = spaces.Box(
|
||||||
|
low=-1.0,
|
||||||
|
high=1.0,
|
||||||
|
shape=self._true_action_space.shape,
|
||||||
|
dtype=np.float32
|
||||||
|
)
|
||||||
|
|
||||||
|
# create observation space
|
||||||
|
if from_pixels:
|
||||||
|
self._observation_space = spaces.Box(
|
||||||
|
low=0, high=255, shape=[3, height, width], dtype=np.uint8
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._observation_space = _spec_to_box(
|
||||||
|
self._env.observation_spec().values()
|
||||||
|
)
|
||||||
|
|
||||||
|
self._internal_state_space = spaces.Box(
|
||||||
|
low=-np.inf,
|
||||||
|
high=np.inf,
|
||||||
|
shape=self._env.physics.get_state().shape,
|
||||||
|
dtype=np.float32
|
||||||
|
)
|
||||||
|
|
||||||
|
# background
|
||||||
|
if img_source is not None:
|
||||||
|
shape2d = (height, width)
|
||||||
|
if img_source == "color":
|
||||||
|
self._bg_source = natural_imgsource.RandomColorSource(shape2d)
|
||||||
|
elif img_source == "noise":
|
||||||
|
self._bg_source = natural_imgsource.NoiseSource(shape2d)
|
||||||
|
else:
|
||||||
|
files = glob.glob(os.path.expanduser(resource_files))
|
||||||
|
assert len(files), "Pattern {} does not match any files".format(
|
||||||
|
resource_files
|
||||||
|
)
|
||||||
|
if img_source == "images":
|
||||||
|
self._bg_source = natural_imgsource.RandomImageSource(shape2d, files, grayscale=True, total_frames=total_frames)
|
||||||
|
elif img_source == "video":
|
||||||
|
self._bg_source = natural_imgsource.RandomVideoSource(shape2d, files, grayscale=True, total_frames=total_frames)
|
||||||
|
else:
|
||||||
|
raise Exception("img_source %s not defined." % img_source)
|
||||||
|
|
||||||
|
# set seed
|
||||||
|
self.seed(seed=task_kwargs.get('random', 1))
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
def _get_obs(self, time_step):
|
||||||
|
if self._from_pixels:
|
||||||
|
obs = self.render(
|
||||||
|
height=self._height,
|
||||||
|
width=self._width,
|
||||||
|
camera_id=self._camera_id
|
||||||
|
)
|
||||||
|
if self._img_source is not None:
|
||||||
|
mask = np.logical_and((obs[:, :, 2] > obs[:, :, 1]), (obs[:, :, 2] > obs[:, :, 0])) # hardcoded for dmc
|
||||||
|
bg = self._bg_source.get_image()
|
||||||
|
obs[mask] = bg[mask]
|
||||||
|
# obs = obs.transpose(2, 0, 1).copy()
|
||||||
|
# CHW to HWC for tensorflow
|
||||||
|
obs = obs.copy()
|
||||||
|
else:
|
||||||
|
obs = _flatten_obs(time_step.observation)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def _convert_action(self, action):
|
||||||
|
action = action.astype(np.float64)
|
||||||
|
true_delta = self._true_action_space.high - self._true_action_space.low
|
||||||
|
norm_delta = self._norm_action_space.high - self._norm_action_space.low
|
||||||
|
action = (action - self._norm_action_space.low) / norm_delta
|
||||||
|
action = action * true_delta + self._true_action_space.low
|
||||||
|
action = action.astype(np.float32)
|
||||||
|
return action
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
return self._observation_space
|
||||||
|
|
||||||
|
@property
|
||||||
|
def internal_state_space(self):
|
||||||
|
return self._internal_state_space
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
return self._norm_action_space
|
||||||
|
|
||||||
|
def seed(self, seed):
|
||||||
|
self._true_action_space.seed(seed)
|
||||||
|
self._norm_action_space.seed(seed)
|
||||||
|
self._observation_space.seed(seed)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
assert self._norm_action_space.contains(action)
|
||||||
|
action = self._convert_action(action)
|
||||||
|
assert self._true_action_space.contains(action)
|
||||||
|
reward = 0
|
||||||
|
extra = {'internal_state': self._env.physics.get_state().copy()}
|
||||||
|
|
||||||
|
for _ in range(self._frame_skip):
|
||||||
|
time_step = self._env.step(action)
|
||||||
|
reward += time_step.reward or 0
|
||||||
|
done = time_step.last()
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
obs = self._get_obs(time_step)
|
||||||
|
extra['discount'] = time_step.discount
|
||||||
|
return obs, reward, done, extra
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
time_step = self._env.reset()
|
||||||
|
obs = self._get_obs(time_step)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def render(self, mode='rgb_array', height=None, width=None, camera_id=0):
|
||||||
|
assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
|
||||||
|
height = height or self._height
|
||||||
|
width = width or self._width
|
||||||
|
camera_id = camera_id or self._camera_id
|
||||||
|
return self._env.physics.render(
|
||||||
|
height=height, width=width, camera_id=camera_id
|
||||||
|
)
|
739
Dreamer/dreamers.py
Normal file
739
Dreamer/dreamers.py
Normal file
@ -0,0 +1,739 @@
|
|||||||
|
import tools
|
||||||
|
import models
|
||||||
|
from tensorflow_probability import distributions as tfd
|
||||||
|
from tensorflow.keras.mixed_precision import experimental as prec
|
||||||
|
import tensorflow as tf
|
||||||
|
import numpy as np
|
||||||
|
import collections
|
||||||
|
import functools
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
|
from env_tools import preprocess, count_steps
|
||||||
|
|
||||||
|
def load_dataset(directory, config):
|
||||||
|
episode = next(tools.load_episodes(directory, 1))
|
||||||
|
types = {k: v.dtype for k, v in episode.items()}
|
||||||
|
shapes = {k: (None,) + v.shape[1:] for k, v in episode.items()}
|
||||||
|
|
||||||
|
def generator(): return tools.load_episodes(
|
||||||
|
directory, config.train_steps, config.batch_length,
|
||||||
|
config.dataset_balance)
|
||||||
|
dataset = tf.data.Dataset.from_generator(generator, types, shapes)
|
||||||
|
dataset = dataset.batch(config.batch_size, drop_remainder=True)
|
||||||
|
dataset = dataset.map(functools.partial(preprocess, config=config))
|
||||||
|
dataset = dataset.prefetch(10)
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
class Dreamer(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, config, datadir, actspace, writer):
|
||||||
|
self._c = config
|
||||||
|
self._actspace = actspace
|
||||||
|
self._actdim = actspace.n if hasattr(
|
||||||
|
actspace, 'n') else actspace.shape[0]
|
||||||
|
self._writer = writer
|
||||||
|
self._random = np.random.RandomState(config.seed)
|
||||||
|
self._should_pretrain = tools.Once()
|
||||||
|
self._should_train = tools.Every(config.train_every)
|
||||||
|
self._should_log = tools.Every(config.log_every)
|
||||||
|
self._last_log = None
|
||||||
|
self._last_time = time.time()
|
||||||
|
self._metrics = collections.defaultdict(tf.metrics.Mean)
|
||||||
|
self._metrics['expl_amount'] # Create variable for checkpoint.
|
||||||
|
self._float = prec.global_policy().compute_dtype
|
||||||
|
self._strategy = tf.distribute.MirroredStrategy()
|
||||||
|
with tf.device('cpu:0'):
|
||||||
|
self._step = tf.Variable(count_steps(
|
||||||
|
datadir, config), dtype=tf.int64)
|
||||||
|
with self._strategy.scope():
|
||||||
|
self._dataset = iter(self._strategy.experimental_distribute_dataset(
|
||||||
|
load_dataset(datadir, self._c)))
|
||||||
|
self._build_model()
|
||||||
|
|
||||||
|
def __call__(self, obs, reset, state=None, training=True):
|
||||||
|
step = self._step.numpy().item()
|
||||||
|
tf.summary.experimental.set_step(step)
|
||||||
|
if state is not None and reset.any():
|
||||||
|
mask = tf.cast(1 - reset, self._float)[:, None]
|
||||||
|
state = tf.nest.map_structure(lambda x: x * mask, state)
|
||||||
|
if self._should_train(step):
|
||||||
|
log = self._should_log(step)
|
||||||
|
n = self._c.pretrain if self._should_pretrain() else self._c.train_steps
|
||||||
|
print(f'Training for {n} steps.')
|
||||||
|
with self._strategy.scope():
|
||||||
|
for train_step in range(n):
|
||||||
|
log_images = self._c.log_images and log and train_step == 0
|
||||||
|
self.train(next(self._dataset), log_images)
|
||||||
|
if log:
|
||||||
|
self._write_summaries()
|
||||||
|
action, state = self.policy(obs, state, training)
|
||||||
|
if training:
|
||||||
|
self._step.assign_add(len(reset) * self._c.action_repeat)
|
||||||
|
return action, state
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def policy(self, obs, state, training):
|
||||||
|
if state is None:
|
||||||
|
latent = self._dynamics.initial(len(obs['image']))
|
||||||
|
action = tf.zeros((len(obs['image']), self._actdim), self._float)
|
||||||
|
else:
|
||||||
|
latent, action = state
|
||||||
|
embed = self._encode(preprocess(obs, self._c))
|
||||||
|
latent, _ = self._dynamics.obs_step(latent, action, embed)
|
||||||
|
feat = self._dynamics.get_feat(latent)
|
||||||
|
if training:
|
||||||
|
action = self._actor(feat).sample()
|
||||||
|
else:
|
||||||
|
action = self._actor(feat).mode()
|
||||||
|
action = self._exploration(action, training)
|
||||||
|
state = (latent, action)
|
||||||
|
return action, state
|
||||||
|
|
||||||
|
def load(self, filename):
|
||||||
|
super().load(filename)
|
||||||
|
self._should_pretrain()
|
||||||
|
|
||||||
|
@tf.function()
|
||||||
|
def train(self, data, log_images=False):
|
||||||
|
self._strategy.experimental_run_v2(
|
||||||
|
self._train, args=(data, log_images))
|
||||||
|
|
||||||
|
def _train(self, data, log_images):
|
||||||
|
with tf.GradientTape() as model_tape:
|
||||||
|
embed = self._encode(data)
|
||||||
|
post, prior = self._dynamics.observe(embed, data['action'])
|
||||||
|
feat = self._dynamics.get_feat(post)
|
||||||
|
|
||||||
|
image_pred = self._decode(feat)
|
||||||
|
reward_pred = self._reward(feat)
|
||||||
|
|
||||||
|
likes = tools.AttrDict()
|
||||||
|
likes.image = tf.reduce_mean(image_pred.log_prob(data['image']))
|
||||||
|
likes.reward = tf.reduce_mean(reward_pred.log_prob(data['reward']))
|
||||||
|
if self._c.pcont:
|
||||||
|
pcont_pred = self._pcont(feat)
|
||||||
|
pcont_target = self._c.discount * data['discount']
|
||||||
|
likes.pcont = tf.reduce_mean(pcont_pred.log_prob(pcont_target))
|
||||||
|
likes.pcont *= self._c.pcont_scale
|
||||||
|
|
||||||
|
prior_dist = self._dynamics.get_dist(prior)
|
||||||
|
post_dist = self._dynamics.get_dist(post)
|
||||||
|
div = tf.reduce_mean(tfd.kl_divergence(post_dist, prior_dist))
|
||||||
|
div = tf.maximum(div, self._c.free_nats)
|
||||||
|
|
||||||
|
model_loss = self._c.kl_scale * div - sum(likes.values())
|
||||||
|
model_loss /= float(self._strategy.num_replicas_in_sync)
|
||||||
|
|
||||||
|
with tf.GradientTape() as actor_tape:
|
||||||
|
imag_feat = self._imagine_ahead(post)
|
||||||
|
reward = self._reward(imag_feat).mode()
|
||||||
|
if self._c.pcont:
|
||||||
|
pcont = self._pcont(imag_feat).mean()
|
||||||
|
else:
|
||||||
|
pcont = self._c.discount * tf.ones_like(reward)
|
||||||
|
value = self._value(imag_feat).mode()
|
||||||
|
returns = tools.lambda_return(
|
||||||
|
reward[:-1], value[:-1], pcont[:-1],
|
||||||
|
bootstrap=value[-1], lambda_=self._c.disclam, axis=0)
|
||||||
|
discount = tf.stop_gradient(tf.math.cumprod(tf.concat(
|
||||||
|
[tf.ones_like(pcont[:1]), pcont[:-2]], 0), 0))
|
||||||
|
actor_loss = -tf.reduce_mean(discount * returns)
|
||||||
|
actor_loss /= float(self._strategy.num_replicas_in_sync)
|
||||||
|
|
||||||
|
with tf.GradientTape() as value_tape:
|
||||||
|
value_pred = self._value(imag_feat)[:-1]
|
||||||
|
target = tf.stop_gradient(returns)
|
||||||
|
value_loss = - \
|
||||||
|
tf.reduce_mean(discount * value_pred.log_prob(target))
|
||||||
|
value_loss /= float(self._strategy.num_replicas_in_sync)
|
||||||
|
|
||||||
|
model_norm = self._model_opt(model_tape, model_loss)
|
||||||
|
actor_norm = self._actor_opt(actor_tape, actor_loss)
|
||||||
|
value_norm = self._value_opt(value_tape, value_loss)
|
||||||
|
|
||||||
|
if tf.distribute.get_replica_context().replica_id_in_sync_group == 0:
|
||||||
|
if self._c.log_scalars:
|
||||||
|
self._scalar_summaries(
|
||||||
|
data, feat, prior_dist, post_dist, likes, div,
|
||||||
|
model_loss, value_loss, actor_loss, model_norm, value_norm,
|
||||||
|
actor_norm)
|
||||||
|
if tf.equal(log_images, True):
|
||||||
|
self._image_summaries(data, embed, image_pred)
|
||||||
|
|
||||||
|
def _build_model(self):
|
||||||
|
acts = dict(
|
||||||
|
elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish,
|
||||||
|
leaky_relu=tf.nn.leaky_relu)
|
||||||
|
cnn_act = acts[self._c.cnn_act]
|
||||||
|
act = acts[self._c.dense_act]
|
||||||
|
self._encode = models.ConvEncoder(
|
||||||
|
self._c.cnn_depth, cnn_act, self._c.image_size)
|
||||||
|
self._dynamics = models.RSSM(
|
||||||
|
self._c.stoch_size, self._c.deter_size, self._c.deter_size)
|
||||||
|
self._decode = models.ConvDecoder(
|
||||||
|
self._c.cnn_depth, cnn_act, (self._c.image_size, self._c.image_size, 3))
|
||||||
|
self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
|
||||||
|
if self._c.pcont:
|
||||||
|
self._pcont = models.DenseDecoder(
|
||||||
|
(), 3, self._c.num_units, 'binary', act=act)
|
||||||
|
self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
|
||||||
|
self._actor = models.ActionDecoder(
|
||||||
|
self._actdim, 4, self._c.num_units, self._c.action_dist,
|
||||||
|
init_std=self._c.action_init_std, act=act)
|
||||||
|
model_modules = [self._encode, self._dynamics,
|
||||||
|
self._decode, self._reward]
|
||||||
|
if self._c.pcont:
|
||||||
|
model_modules.append(self._pcont)
|
||||||
|
Optimizer = functools.partial(
|
||||||
|
tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip,
|
||||||
|
wdpattern=self._c.weight_decay_pattern)
|
||||||
|
self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
|
||||||
|
self._value_opt = Optimizer('value', [self._value], self._c.value_lr)
|
||||||
|
self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)
|
||||||
|
self.train(next(self._dataset))
|
||||||
|
|
||||||
|
def _exploration(self, action, training):
|
||||||
|
if training:
|
||||||
|
amount = self._c.expl_amount
|
||||||
|
if self._c.expl_decay:
|
||||||
|
amount *= 0.5 ** (tf.cast(self._step,
|
||||||
|
tf.float32) / self._c.expl_decay)
|
||||||
|
if self._c.expl_min:
|
||||||
|
amount = tf.maximum(self._c.expl_min, amount)
|
||||||
|
self._metrics['expl_amount'].update_state(amount)
|
||||||
|
elif self._c.eval_noise:
|
||||||
|
amount = self._c.eval_noise
|
||||||
|
else:
|
||||||
|
return action
|
||||||
|
if self._c.expl == 'additive_gaussian':
|
||||||
|
return tf.clip_by_value(tfd.Normal(action, amount).sample(), -1, 1)
|
||||||
|
if self._c.expl == 'completely_random':
|
||||||
|
return tf.random.uniform(action.shape, -1, 1)
|
||||||
|
if self._c.expl == 'epsilon_greedy':
|
||||||
|
indices = tfd.Categorical(0 * action).sample()
|
||||||
|
# pylint: disable=unexpected-keyword-arg, no-value-for-parameter
|
||||||
|
return tf.where(
|
||||||
|
tf.random.uniform(action.shape[:1], 0, 1) < amount,
|
||||||
|
tf.one_hot(indices, action.shape[-1], dtype=self._float),
|
||||||
|
action)
|
||||||
|
raise NotImplementedError(self._c.expl)
|
||||||
|
|
||||||
|
def _imagine_ahead(self, post):
|
||||||
|
if self._c.pcont: # Last step could be terminal.
|
||||||
|
post = {k: v[:, :-1] for k, v in post.items()}
|
||||||
|
|
||||||
|
def flatten(x): return tf.reshape(x, [-1] + list(x.shape[2:]))
|
||||||
|
start = {k: flatten(v) for k, v in post.items()}
|
||||||
|
|
||||||
|
def policy(state): return self._actor(
|
||||||
|
tf.stop_gradient(self._dynamics.get_feat(state))).sample()
|
||||||
|
states = tools.static_scan(
|
||||||
|
lambda prev, _: self._dynamics.img_step(prev, policy(prev)),
|
||||||
|
tf.range(self._c.horizon), start)
|
||||||
|
imag_feat = self._dynamics.get_feat(states)
|
||||||
|
return imag_feat
|
||||||
|
|
||||||
|
def _scalar_summaries(
|
||||||
|
self, data, feat, prior_dist, post_dist, likes, div,
|
||||||
|
model_loss, value_loss, actor_loss, model_norm, value_norm,
|
||||||
|
actor_norm):
|
||||||
|
self._metrics['model_grad_norm'].update_state(model_norm)
|
||||||
|
self._metrics['value_grad_norm'].update_state(value_norm)
|
||||||
|
self._metrics['actor_grad_norm'].update_state(actor_norm)
|
||||||
|
self._metrics['prior_ent'].update_state(prior_dist.entropy())
|
||||||
|
self._metrics['post_ent'].update_state(post_dist.entropy())
|
||||||
|
for name, logprob in likes.items():
|
||||||
|
self._metrics[name + '_loss'].update_state(-logprob)
|
||||||
|
self._metrics['div'].update_state(div)
|
||||||
|
self._metrics['model_loss'].update_state(model_loss)
|
||||||
|
self._metrics['value_loss'].update_state(value_loss)
|
||||||
|
self._metrics['actor_loss'].update_state(actor_loss)
|
||||||
|
self._metrics['action_ent'].update_state(self._actor(feat).entropy())
|
||||||
|
|
||||||
|
def _image_summaries(self, data, embed, image_pred):
|
||||||
|
truth = data['image'][:6] + 0.5
|
||||||
|
recon = image_pred.mode()[:6]
|
||||||
|
init, _ = self._dynamics.observe(embed[:6, :5], data['action'][:6, :5])
|
||||||
|
init = {k: v[:, -1] for k, v in init.items()}
|
||||||
|
prior = self._dynamics.imagine(data['action'][:6, 5:], init)
|
||||||
|
openl = self._decode(self._dynamics.get_feat(prior)).mode()
|
||||||
|
model = tf.concat([recon[:, :5] + 0.5, openl + 0.5], 1)
|
||||||
|
error = (model - truth + 1) / 2
|
||||||
|
openl = tf.concat([truth, model, error], 2)
|
||||||
|
tools.graph_summary(
|
||||||
|
self._writer, tools.video_summary, self._step, 'agent/openl', openl)
|
||||||
|
|
||||||
|
def image_summary_from_data(self, data):
|
||||||
|
truth = data['image'][:6] + 0.5
|
||||||
|
embed = self._encode(data)
|
||||||
|
post, _ = self._dynamics.observe(
|
||||||
|
embed[:6, :5], data['action'][:6, :5])
|
||||||
|
feat = self._dynamics.get_feat(post)
|
||||||
|
init = {k: v[:, -1] for k, v in post.items()}
|
||||||
|
recon = self._decode(feat).mode()[:6]
|
||||||
|
prior = self._dynamics.imagine(data['action'][:6, 5:], init)
|
||||||
|
openl = self._decode(self._dynamics.get_feat(prior)).mode()
|
||||||
|
model = tf.concat([recon[:, :5] + 0.5, openl + 0.5], 1)
|
||||||
|
error = (model - truth + 1) / 2
|
||||||
|
openl = tf.concat([truth, model, error], 2)
|
||||||
|
tools.graph_summary(
|
||||||
|
self._writer, tools.video_summary, self._step, 'agent/eval_openl', openl)
|
||||||
|
|
||||||
|
def _write_summaries(self):
|
||||||
|
step = int(self._step.numpy())
|
||||||
|
metrics = [(k, float(v.result())) for k, v in self._metrics.items()]
|
||||||
|
if self._last_log is not None:
|
||||||
|
duration = time.time() - self._last_time
|
||||||
|
self._last_time += duration
|
||||||
|
metrics.append(('fps', (step - self._last_log) / duration))
|
||||||
|
self._last_log = step
|
||||||
|
[m.reset_states() for m in self._metrics.values()]
|
||||||
|
with (self._c.logdir / 'metrics.jsonl').open('a') as f:
|
||||||
|
f.write(json.dumps({'step': step, **dict(metrics)}) + '\n')
|
||||||
|
[tf.summary.scalar('agent/' + k, m) for k, m in metrics]
|
||||||
|
print(f'[{step}]', ' / '.join(f'{k} {v:.1f}' for k, v in metrics))
|
||||||
|
self._writer.flush()
|
||||||
|
|
||||||
|
|
||||||
|
class SeparationDreamer(Dreamer):
|
||||||
|
|
||||||
|
def __init__(self, config, datadir, actspace, writer):
|
||||||
|
self._metrics_disen = collections.defaultdict(tf.metrics.Mean)
|
||||||
|
self._metrics_disen['expl_amount']
|
||||||
|
super().__init__(config, datadir, actspace, writer)
|
||||||
|
|
||||||
|
def _train(self, data, log_images):
|
||||||
|
with tf.GradientTape(persistent=True) as model_tape:
|
||||||
|
|
||||||
|
# main
|
||||||
|
embed = self._encode(data)
|
||||||
|
post, prior = self._dynamics.observe(embed, data['action'])
|
||||||
|
feat = self._dynamics.get_feat(post)
|
||||||
|
|
||||||
|
# disen
|
||||||
|
embed_disen = self._disen_encode(data)
|
||||||
|
post_disen, prior_disen = self._disen_dynamics.observe(
|
||||||
|
embed_disen, data['action'])
|
||||||
|
feat_disen = self._disen_dynamics.get_feat(post_disen)
|
||||||
|
|
||||||
|
# disen image pred
|
||||||
|
image_pred_disen = self._disen_only_decode(feat_disen)
|
||||||
|
|
||||||
|
# joint image pred
|
||||||
|
image_pred_joint, image_pred_joint_main, image_pred_joint_disen, mask_pred = self._joint_decode(
|
||||||
|
feat, feat_disen)
|
||||||
|
|
||||||
|
# reward pred
|
||||||
|
reward_pred = self._reward(feat)
|
||||||
|
|
||||||
|
# optimize disen reward predictor till optimal
|
||||||
|
for _ in range(self._c.num_reward_opt_iters):
|
||||||
|
with tf.GradientTape() as disen_reward_tape:
|
||||||
|
reward_pred_disen = self._disen_reward(
|
||||||
|
tf.stop_gradient(feat_disen))
|
||||||
|
reward_like_disen = reward_pred_disen.log_prob(
|
||||||
|
data['reward'])
|
||||||
|
reward_loss_disen = -tf.reduce_mean(reward_like_disen)
|
||||||
|
reward_loss_disen /= float(
|
||||||
|
self._strategy.num_replicas_in_sync)
|
||||||
|
reward_disen_norm = self._disen_reward_opt(
|
||||||
|
disen_reward_tape, reward_loss_disen)
|
||||||
|
|
||||||
|
# disen reward pred with optimal reward predictor
|
||||||
|
reward_pred_disen = self._disen_reward(feat_disen)
|
||||||
|
reward_like_disen = tf.reduce_mean(
|
||||||
|
reward_pred_disen.log_prob(data['reward']))
|
||||||
|
|
||||||
|
# main model loss
|
||||||
|
likes = tools.AttrDict()
|
||||||
|
likes.image = tf.reduce_mean(
|
||||||
|
image_pred_joint.log_prob(data['image']))
|
||||||
|
likes.reward = tf.reduce_mean(reward_pred.log_prob(
|
||||||
|
data['reward'])) * self._c.reward_scale
|
||||||
|
if self._c.pcont:
|
||||||
|
pcont_pred = self._pcont(feat)
|
||||||
|
pcont_target = self._c.discount * data['discount']
|
||||||
|
likes.pcont = tf.reduce_mean(pcont_pred.log_prob(pcont_target))
|
||||||
|
likes.pcont *= self._c.pcont_scale
|
||||||
|
|
||||||
|
prior_dist = self._dynamics.get_dist(prior)
|
||||||
|
post_dist = self._dynamics.get_dist(post)
|
||||||
|
div = tf.reduce_mean(tfd.kl_divergence(post_dist, prior_dist))
|
||||||
|
div = tf.maximum(div, self._c.free_nats)
|
||||||
|
|
||||||
|
model_loss = self._c.kl_scale * div - sum(likes.values())
|
||||||
|
model_loss /= float(self._strategy.num_replicas_in_sync)
|
||||||
|
|
||||||
|
# disen model loss with reward negative gradient
|
||||||
|
likes_disen = tools.AttrDict()
|
||||||
|
likes_disen.image = tf.reduce_mean(
|
||||||
|
image_pred_joint.log_prob(data['image']))
|
||||||
|
likes_disen.disen_only = tf.reduce_mean(
|
||||||
|
image_pred_disen.log_prob(data['image']))
|
||||||
|
|
||||||
|
reward_like_disen = reward_pred_disen.log_prob(data['reward'])
|
||||||
|
reward_like_disen = tf.reduce_mean(reward_like_disen)
|
||||||
|
reward_loss_disen = -reward_like_disen
|
||||||
|
|
||||||
|
prior_dist_disen = self._disen_dynamics.get_dist(prior_disen)
|
||||||
|
post_dist_disen = self._disen_dynamics.get_dist(post_disen)
|
||||||
|
div_disen = tf.reduce_mean(tfd.kl_divergence(
|
||||||
|
post_dist_disen, prior_dist_disen))
|
||||||
|
div_disen = tf.maximum(div_disen, self._c.free_nats)
|
||||||
|
|
||||||
|
model_loss_disen = div_disen * self._c.disen_kl_scale + \
|
||||||
|
reward_like_disen * self._c.disen_neg_rew_scale - \
|
||||||
|
likes_disen.image - likes_disen.disen_only * self._c.disen_rec_scale
|
||||||
|
model_loss_disen /= float(self._strategy.num_replicas_in_sync)
|
||||||
|
|
||||||
|
decode_loss = model_loss_disen + model_loss
|
||||||
|
|
||||||
|
with tf.GradientTape() as actor_tape:
|
||||||
|
imag_feat = self._imagine_ahead(post)
|
||||||
|
reward = self._reward(imag_feat).mode()
|
||||||
|
if self._c.pcont:
|
||||||
|
pcont = self._pcont(imag_feat).mean()
|
||||||
|
else:
|
||||||
|
pcont = self._c.discount * tf.ones_like(reward)
|
||||||
|
value = self._value(imag_feat).mode()
|
||||||
|
returns = tools.lambda_return(
|
||||||
|
reward[:-1], value[:-1], pcont[:-1],
|
||||||
|
bootstrap=value[-1], lambda_=self._c.disclam, axis=0)
|
||||||
|
discount = tf.stop_gradient(tf.math.cumprod(tf.concat(
|
||||||
|
[tf.ones_like(pcont[:1]), pcont[:-2]], 0), 0))
|
||||||
|
actor_loss = -tf.reduce_mean(discount * returns)
|
||||||
|
actor_loss /= float(self._strategy.num_replicas_in_sync)
|
||||||
|
|
||||||
|
with tf.GradientTape() as value_tape:
|
||||||
|
value_pred = self._value(imag_feat)[:-1]
|
||||||
|
target = tf.stop_gradient(returns)
|
||||||
|
value_loss = - \
|
||||||
|
tf.reduce_mean(discount * value_pred.log_prob(target))
|
||||||
|
value_loss /= float(self._strategy.num_replicas_in_sync)
|
||||||
|
|
||||||
|
model_norm = self._model_opt(model_tape, model_loss)
|
||||||
|
model_disen_norm = self._disen_opt(model_tape, model_loss_disen)
|
||||||
|
decode_norm = self._decode_opt(model_tape, decode_loss)
|
||||||
|
actor_norm = self._actor_opt(actor_tape, actor_loss)
|
||||||
|
value_norm = self._value_opt(value_tape, value_loss)
|
||||||
|
|
||||||
|
if tf.distribute.get_replica_context().replica_id_in_sync_group == 0:
|
||||||
|
if self._c.log_scalars:
|
||||||
|
self._scalar_summaries(
|
||||||
|
data, feat, prior_dist, post_dist, likes, div,
|
||||||
|
model_loss, value_loss, actor_loss, model_norm, value_norm, actor_norm)
|
||||||
|
self._scalar_summaries_disen(
|
||||||
|
prior_dist_disen, post_dist_disen, likes_disen, div_disen,
|
||||||
|
model_loss_disen, reward_loss_disen,
|
||||||
|
model_disen_norm, reward_disen_norm)
|
||||||
|
if tf.equal(log_images, True):
|
||||||
|
self._image_summaries_joint(
|
||||||
|
data, embed, embed_disen, image_pred_joint, mask_pred)
|
||||||
|
self._image_summaries(
|
||||||
|
self._disen_dynamics, self._disen_decode, data, embed_disen, image_pred_joint_disen, tag='disen/openl_joint_disen')
|
||||||
|
self._image_summaries(
|
||||||
|
self._disen_dynamics, self._disen_only_decode, data, embed_disen, image_pred_disen, tag='disen_only/openl_disen_only')
|
||||||
|
self._image_summaries(
|
||||||
|
self._dynamics, self._main_decode, data, embed, image_pred_joint_main, tag='main/openl_joint_main')
|
||||||
|
|
||||||
|
def _build_model(self):
|
||||||
|
acts = dict(
|
||||||
|
elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish,
|
||||||
|
leaky_relu=tf.nn.leaky_relu)
|
||||||
|
cnn_act = acts[self._c.cnn_act]
|
||||||
|
act = acts[self._c.dense_act]
|
||||||
|
|
||||||
|
# Distractor dynamic model
|
||||||
|
self._disen_encode = models.ConvEncoder(
|
||||||
|
self._c.disen_cnn_depth, cnn_act, self._c.image_size)
|
||||||
|
self._disen_dynamics = models.RSSM(
|
||||||
|
self._c.disen_stoch_size, self._c.disen_deter_size, self._c.disen_deter_size)
|
||||||
|
self._disen_only_decode = models.ConvDecoder(
|
||||||
|
self._c.disen_cnn_depth, cnn_act, (self._c.image_size, self._c.image_size, 3))
|
||||||
|
self._disen_reward = models.DenseDecoder(
|
||||||
|
(), 2, self._c.num_units, act=act)
|
||||||
|
|
||||||
|
# Task dynamic model
|
||||||
|
self._encode = models.ConvEncoder(
|
||||||
|
self._c.cnn_depth, cnn_act, self._c.image_size)
|
||||||
|
self._dynamics = models.RSSM(
|
||||||
|
self._c.stoch_size, self._c.deter_size, self._c.deter_size)
|
||||||
|
self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
|
||||||
|
if self._c.pcont:
|
||||||
|
self._pcont = models.DenseDecoder(
|
||||||
|
(), 3, self._c.num_units, 'binary', act=act)
|
||||||
|
self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
|
||||||
|
self._actor = models.ActionDecoder(
|
||||||
|
self._actdim, 4, self._c.num_units, self._c.action_dist,
|
||||||
|
init_std=self._c.action_init_std, act=act)
|
||||||
|
|
||||||
|
# Joint decode
|
||||||
|
self._main_decode = models.ConvDecoderMask(
|
||||||
|
self._c.cnn_depth, cnn_act, (self._c.image_size, self._c.image_size, 3))
|
||||||
|
self._disen_decode = models.ConvDecoderMask(
|
||||||
|
self._c.disen_cnn_depth, cnn_act, (self._c.image_size, self._c.image_size, 3))
|
||||||
|
self._joint_decode = models.ConvDecoderMaskEnsemble(
|
||||||
|
self._main_decode, self._disen_decode, self._c.precision
|
||||||
|
)
|
||||||
|
|
||||||
|
disen_modules = [self._disen_encode,
|
||||||
|
self._disen_dynamics, self._disen_only_decode]
|
||||||
|
model_modules = [self._encode, self._dynamics, self._reward]
|
||||||
|
if self._c.pcont:
|
||||||
|
model_modules.append(self._pcont)
|
||||||
|
|
||||||
|
Optimizer = functools.partial(
|
||||||
|
tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip,
|
||||||
|
wdpattern=self._c.weight_decay_pattern)
|
||||||
|
self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
|
||||||
|
self._disen_opt = Optimizer('disen', disen_modules, self._c.model_lr)
|
||||||
|
self._decode_opt = Optimizer(
|
||||||
|
'decode', [self._joint_decode], self._c.model_lr)
|
||||||
|
self._disen_reward_opt = Optimizer(
|
||||||
|
'disen_reward', [self._disen_reward], self._c.disen_reward_lr)
|
||||||
|
self._value_opt = Optimizer('value', [self._value], self._c.value_lr)
|
||||||
|
self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)
|
||||||
|
self.train(next(self._dataset))
|
||||||
|
|
||||||
|
def _scalar_summaries_disen(
|
||||||
|
self, prior_dist_disen, post_dist_disen, likes_disen, div_disen,
|
||||||
|
model_loss_disen, reward_loss_disen,
|
||||||
|
model_disen_norm, reward_disen_norm):
|
||||||
|
self._metrics_disen['model_grad_norm'].update_state(model_disen_norm)
|
||||||
|
self._metrics_disen['reward_grad_norm'].update_state(reward_disen_norm)
|
||||||
|
self._metrics_disen['prior_ent'].update_state(
|
||||||
|
prior_dist_disen.entropy())
|
||||||
|
self._metrics_disen['post_ent'].update_state(post_dist_disen.entropy())
|
||||||
|
for name, logprob in likes_disen.items():
|
||||||
|
self._metrics_disen[name + '_loss'].update_state(-logprob)
|
||||||
|
self._metrics_disen['div'].update_state(div_disen)
|
||||||
|
self._metrics_disen['model_loss'].update_state(model_loss_disen)
|
||||||
|
self._metrics_disen['reward_loss'].update_state(
|
||||||
|
reward_loss_disen)
|
||||||
|
|
||||||
|
def _image_summaries(self, dynamics, decoder, data, embed, image_pred, tag='agent/openl'):
|
||||||
|
truth = data['image'][:6] + 0.5
|
||||||
|
recon = image_pred.mode()[:6]
|
||||||
|
init, _ = dynamics.observe(embed[:6, :5], data['action'][:6, :5])
|
||||||
|
init = {k: v[:, -1] for k, v in init.items()}
|
||||||
|
prior = dynamics.imagine(data['action'][:6, 5:], init)
|
||||||
|
if isinstance(decoder, models.ConvDecoderMask):
|
||||||
|
openl, _ = decoder(dynamics.get_feat(prior))
|
||||||
|
openl = openl.mode()
|
||||||
|
else:
|
||||||
|
openl = decoder(dynamics.get_feat(prior)).mode()
|
||||||
|
model = tf.concat([recon[:, :5] + 0.5, openl + 0.5], 1)
|
||||||
|
error = (model - truth + 1) / 2
|
||||||
|
openl = tf.concat([truth, model, error], 2)
|
||||||
|
tools.graph_summary(
|
||||||
|
self._writer, tools.video_summary, self._step, tag, openl)
|
||||||
|
|
||||||
|
def _image_summaries_joint(self, data, embed, embed_disen, image_pred_joint, mask_pred):
|
||||||
|
truth = data['image'][:6] + 0.5
|
||||||
|
recon_joint = image_pred_joint.mode()[:6]
|
||||||
|
mask_pred = mask_pred[:6]
|
||||||
|
|
||||||
|
init, _ = self._dynamics.observe(
|
||||||
|
embed[:6, :5], data['action'][:6, :5])
|
||||||
|
init_disen, _ = self._disen_dynamics.observe(
|
||||||
|
embed_disen[:6, :5], data['action'][:6, :5])
|
||||||
|
init = {k: v[:, -1] for k, v in init.items()}
|
||||||
|
init_disen = {k: v[:, -1] for k, v in init_disen.items()}
|
||||||
|
prior = self._dynamics.imagine(
|
||||||
|
data['action'][:6, 5:], init)
|
||||||
|
prior_disen = self._disen_dynamics.imagine(
|
||||||
|
data['action'][:6, 5:], init_disen)
|
||||||
|
|
||||||
|
feat = self._dynamics.get_feat(prior)
|
||||||
|
feat_disen = self._disen_dynamics.get_feat(prior_disen)
|
||||||
|
openl, _, _, openl_mask = self._joint_decode(feat, feat_disen)
|
||||||
|
|
||||||
|
openl = openl.mode()
|
||||||
|
model = tf.concat([recon_joint[:, :5] + 0.5, openl + 0.5], 1)
|
||||||
|
error = (model - truth + 1) / 2
|
||||||
|
openl = tf.concat([truth, model, error], 2)
|
||||||
|
openl_mask = tf.concat([mask_pred[:, :5] + 0.5, openl_mask + 0.5], 1)
|
||||||
|
|
||||||
|
tools.graph_summary(
|
||||||
|
self._writer, tools.video_summary, self._step, 'joint/openl_joint', openl)
|
||||||
|
tools.graph_summary(
|
||||||
|
self._writer, tools.video_summary, self._step, 'mask/openl_mask', openl_mask)
|
||||||
|
|
||||||
|
def image_summary_from_data(self, data):
|
||||||
|
truth = data['image'][:6] + 0.5
|
||||||
|
|
||||||
|
# main
|
||||||
|
embed = self._encode(data)
|
||||||
|
post, _ = self._dynamics.observe(
|
||||||
|
embed[:6, :5], data['action'][:6, :5])
|
||||||
|
feat = self._dynamics.get_feat(post)
|
||||||
|
init = {k: v[:, -1] for k, v in post.items()}
|
||||||
|
|
||||||
|
# disen
|
||||||
|
embed_disen = self._disen_encode(data)
|
||||||
|
post_disen, _ = self._disen_dynamics.observe(
|
||||||
|
embed_disen[:6, :5], data['action'][:6, :5])
|
||||||
|
feat_disen = self._disen_dynamics.get_feat(post_disen)
|
||||||
|
init_disen = {k: v[:, -1] for k, v in post_disen.items()}
|
||||||
|
|
||||||
|
# joint image pred
|
||||||
|
recon_joint, recon_main, recon_disen, recon_mask = self._joint_decode(
|
||||||
|
feat, feat_disen)
|
||||||
|
recon_joint = recon_joint.mode()[:6]
|
||||||
|
recon_main = recon_main.mode()[:6]
|
||||||
|
recon_disen = recon_disen.mode()[:6]
|
||||||
|
recon_mask = recon_mask[:6]
|
||||||
|
|
||||||
|
prior = self._dynamics.imagine(
|
||||||
|
data['action'][:6, 5:], init)
|
||||||
|
prior_disen = self._disen_dynamics.imagine(
|
||||||
|
data['action'][:6, 5:], init_disen)
|
||||||
|
feat = self._dynamics.get_feat(prior)
|
||||||
|
feat_disen = self._disen_dynamics.get_feat(prior_disen)
|
||||||
|
|
||||||
|
openl_joint, openl_main, openl_disen, openl_mask = self._joint_decode(
|
||||||
|
feat, feat_disen)
|
||||||
|
openl_joint = openl_joint.mode()
|
||||||
|
openl_main = openl_main.mode()
|
||||||
|
openl_disen = openl_disen.mode()
|
||||||
|
|
||||||
|
model_joint = tf.concat(
|
||||||
|
[recon_joint[:, :5] + 0.5, openl_joint + 0.5], 1)
|
||||||
|
error_joint = (model_joint - truth + 1) / 2
|
||||||
|
model_main = tf.concat(
|
||||||
|
[recon_main[:, :5] + 0.5, openl_main + 0.5], 1)
|
||||||
|
model_disen = tf.concat(
|
||||||
|
[recon_disen[:, :5] + 0.5, openl_disen + 0.5], 1)
|
||||||
|
model_mask = tf.concat(
|
||||||
|
[recon_mask[:, :5] + 0.5, openl_mask + 0.5], 1)
|
||||||
|
|
||||||
|
output_joint = tf.concat(
|
||||||
|
[truth, model_main, model_disen, model_joint, error_joint], 2)
|
||||||
|
output_mask = model_mask
|
||||||
|
|
||||||
|
tools.graph_summary(
|
||||||
|
self._writer, tools.video_summary, self._step, 'summary/openl', output_joint)
|
||||||
|
tools.graph_summary(
|
||||||
|
self._writer, tools.video_summary, self._step, 'summary/openl_mask', output_mask)
|
||||||
|
|
||||||
|
def _write_summaries(self):
|
||||||
|
step = int(self._step.numpy())
|
||||||
|
metrics = [(k, float(v.result())) for k, v in self._metrics.items()]
|
||||||
|
metrics_disen = [(k, float(v.result()))
|
||||||
|
for k, v in self._metrics_disen.items()]
|
||||||
|
if self._last_log is not None:
|
||||||
|
duration = time.time() - self._last_time
|
||||||
|
self._last_time += duration
|
||||||
|
metrics.append(('fps', (step - self._last_log) / duration))
|
||||||
|
self._last_log = step
|
||||||
|
[m.reset_states() for m in self._metrics.values()]
|
||||||
|
[m.reset_states() for m in self._metrics_disen.values()]
|
||||||
|
with (self._c.logdir / 'metrics.jsonl').open('a') as f:
|
||||||
|
f.write(json.dumps({'step': step, **dict(metrics)}) + '\n')
|
||||||
|
[tf.summary.scalar('agent/' + k, m) for k, m in metrics]
|
||||||
|
[tf.summary.scalar('disen/' + k, m) for k, m in metrics_disen]
|
||||||
|
print('#'*30 + ' Main ' + '#'*30)
|
||||||
|
print(f'[{step}]', ' / '.join(f'{k} {v:.1f}' for k, v in metrics))
|
||||||
|
print('#'*30 + ' Disen ' + '#'*30)
|
||||||
|
print(f'[{step}]', ' / '.join(f'{k} {v:.1f}' for k, v in metrics_disen))
|
||||||
|
self._writer.flush()
|
||||||
|
|
||||||
|
class InverseDreamer(Dreamer):
|
||||||
|
|
||||||
|
def __init__(self, config, datadir, actspace, writer):
|
||||||
|
super().__init__(config, datadir, actspace, writer)
|
||||||
|
|
||||||
|
def _train(self, data, log_images):
|
||||||
|
with tf.GradientTape() as model_tape:
|
||||||
|
embed = self._encode(data)
|
||||||
|
post, prior = self._dynamics.observe(embed, data['action'])
|
||||||
|
feat = self._dynamics.get_feat(post)
|
||||||
|
|
||||||
|
action_pred = self._decode(feat)
|
||||||
|
reward_pred = self._reward(feat)
|
||||||
|
|
||||||
|
likes = tools.AttrDict()
|
||||||
|
likes.action = tf.reduce_mean(
|
||||||
|
action_pred.log_prob(data['action'][:, :-1]))
|
||||||
|
likes.reward = tf.reduce_mean(
|
||||||
|
reward_pred.log_prob(data['reward']))
|
||||||
|
if self._c.pcont:
|
||||||
|
pcont_pred = self._pcont(feat)
|
||||||
|
pcont_target = self._c.discount * data['discount']
|
||||||
|
likes.pcont = tf.reduce_mean(pcont_pred.log_prob(pcont_target))
|
||||||
|
likes.pcont *= self._c.pcont_scale
|
||||||
|
|
||||||
|
prior_dist = self._dynamics.get_dist(prior)
|
||||||
|
post_dist = self._dynamics.get_dist(post)
|
||||||
|
div = tf.reduce_mean(tfd.kl_divergence(post_dist, prior_dist))
|
||||||
|
div = tf.maximum(div, self._c.free_nats)
|
||||||
|
|
||||||
|
model_loss = self._c.kl_scale * div - sum(likes.values())
|
||||||
|
model_loss /= float(self._strategy.num_replicas_in_sync)
|
||||||
|
|
||||||
|
with tf.GradientTape() as actor_tape:
|
||||||
|
imag_feat = self._imagine_ahead(post)
|
||||||
|
reward = self._reward(imag_feat).mode()
|
||||||
|
if self._c.pcont:
|
||||||
|
pcont = self._pcont(imag_feat).mean()
|
||||||
|
else:
|
||||||
|
pcont = self._c.discount * tf.ones_like(reward)
|
||||||
|
value = self._value(imag_feat).mode()
|
||||||
|
returns = tools.lambda_return(
|
||||||
|
reward[:-1], value[:-1], pcont[:-1],
|
||||||
|
bootstrap=value[-1], lambda_=self._c.disclam, axis=0)
|
||||||
|
discount = tf.stop_gradient(tf.math.cumprod(tf.concat(
|
||||||
|
[tf.ones_like(pcont[:1]), pcont[:-2]], 0), 0))
|
||||||
|
actor_loss = -tf.reduce_mean(discount * returns)
|
||||||
|
actor_loss /= float(self._strategy.num_replicas_in_sync)
|
||||||
|
|
||||||
|
with tf.GradientTape() as value_tape:
|
||||||
|
value_pred = self._value(imag_feat)[:-1]
|
||||||
|
target = tf.stop_gradient(returns)
|
||||||
|
value_loss = - \
|
||||||
|
tf.reduce_mean(discount * value_pred.log_prob(target))
|
||||||
|
value_loss /= float(self._strategy.num_replicas_in_sync)
|
||||||
|
|
||||||
|
model_norm = self._model_opt(model_tape, model_loss)
|
||||||
|
actor_norm = self._actor_opt(actor_tape, actor_loss)
|
||||||
|
value_norm = self._value_opt(value_tape, value_loss)
|
||||||
|
|
||||||
|
if tf.distribute.get_replica_context().replica_id_in_sync_group == 0:
|
||||||
|
if self._c.log_scalars:
|
||||||
|
self._scalar_summaries(
|
||||||
|
data, feat, prior_dist, post_dist, likes, div,
|
||||||
|
model_loss, value_loss, actor_loss, model_norm, value_norm,
|
||||||
|
actor_norm)
|
||||||
|
|
||||||
|
def _build_model(self):
|
||||||
|
acts = dict(
|
||||||
|
elu=tf.nn.elu, relu=tf.nn.relu, swish=tf.nn.swish,
|
||||||
|
leaky_relu=tf.nn.leaky_relu)
|
||||||
|
cnn_act = acts[self._c.cnn_act]
|
||||||
|
act = acts[self._c.dense_act]
|
||||||
|
self._encode = models.ConvEncoder(
|
||||||
|
self._c.cnn_depth, cnn_act, self._c.image_size)
|
||||||
|
self._dynamics = models.RSSM(
|
||||||
|
self._c.stoch_size, self._c.deter_size, self._c.deter_size)
|
||||||
|
self._decode = models.InverseDecoder(
|
||||||
|
self._actdim, 4, self._c.num_units, act=act)
|
||||||
|
self._reward = models.DenseDecoder((), 2, self._c.num_units, act=act)
|
||||||
|
if self._c.pcont:
|
||||||
|
self._pcont = models.DenseDecoder(
|
||||||
|
(), 3, self._c.num_units, 'binary', act=act)
|
||||||
|
self._value = models.DenseDecoder((), 3, self._c.num_units, act=act)
|
||||||
|
self._actor = models.ActionDecoder(
|
||||||
|
self._actdim, 4, self._c.num_units, self._c.action_dist,
|
||||||
|
init_std=self._c.action_init_std, act=act)
|
||||||
|
model_modules = [self._encode, self._dynamics,
|
||||||
|
self._decode, self._reward]
|
||||||
|
if self._c.pcont:
|
||||||
|
model_modules.append(self._pcont)
|
||||||
|
Optimizer = functools.partial(
|
||||||
|
tools.Adam, wd=self._c.weight_decay, clip=self._c.grad_clip,
|
||||||
|
wdpattern=self._c.weight_decay_pattern)
|
||||||
|
self._model_opt = Optimizer('model', model_modules, self._c.model_lr)
|
||||||
|
self._value_opt = Optimizer('value', [self._value], self._c.value_lr)
|
||||||
|
self._actor_opt = Optimizer('actor', [self._actor], self._c.actor_lr)
|
||||||
|
self.train(next(self._dataset))
|
84
Dreamer/env_tools.py
Normal file
84
Dreamer/env_tools.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
import dmc2gym
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras.mixed_precision import experimental as prec
|
||||||
|
|
||||||
|
import tools
|
||||||
|
import wrappers
|
||||||
|
|
||||||
|
def preprocess(obs, config):
|
||||||
|
dtype = prec.global_policy().compute_dtype
|
||||||
|
obs = obs.copy()
|
||||||
|
with tf.device('cpu:0'):
|
||||||
|
obs['image'] = tf.cast(obs['image'], dtype) / 255.0 - 0.5
|
||||||
|
clip_rewards = dict(none=lambda x: x, tanh=tf.tanh)[
|
||||||
|
config.clip_rewards]
|
||||||
|
obs['reward'] = clip_rewards(obs['reward'])
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def count_steps(datadir, config):
|
||||||
|
return tools.count_episodes(datadir)[1] * config.action_repeat
|
||||||
|
|
||||||
|
def summarize_episode(episode, config, datadir, writer, prefix):
|
||||||
|
episodes, steps = tools.count_episodes(datadir)
|
||||||
|
length = (len(episode['reward']) - 1) * config.action_repeat
|
||||||
|
ret = episode['reward'].sum()
|
||||||
|
print(f'{prefix.title()} episode of length {length} with return {ret:.1f}.')
|
||||||
|
metrics = [
|
||||||
|
(f'{prefix}/return', float(episode['reward'].sum())),
|
||||||
|
(f'{prefix}/length', len(episode['reward']) - 1),
|
||||||
|
('episodes', episodes)]
|
||||||
|
|
||||||
|
step = count_steps(datadir, config)
|
||||||
|
with (config.logdir / 'metrics.jsonl').open('a') as f:
|
||||||
|
f.write(json.dumps(dict([('step', step)] + metrics)) + '\n')
|
||||||
|
with writer.as_default(): # Env might run in a different thread.
|
||||||
|
tf.summary.experimental.set_step(step)
|
||||||
|
[tf.summary.scalar('sim/' + k, v) for k, v in metrics]
|
||||||
|
if prefix == 'test':
|
||||||
|
tools.video_summary(f'sim/{prefix}/video', episode['image'][None])
|
||||||
|
|
||||||
|
def make_env(config, writer, prefix, datadir, video_dir, store):
|
||||||
|
suite, domain_task_distractor = config.task.split('_', 1)
|
||||||
|
domain, task_distractor = domain_task_distractor.split('_', 1)
|
||||||
|
task, distractor = task_distractor.split('_', 1)
|
||||||
|
|
||||||
|
if distractor == 'driving':
|
||||||
|
img_source = 'video'
|
||||||
|
total_frames = 1000
|
||||||
|
resource_files = os.path.join(video_dir, '*.mp4')
|
||||||
|
elif distractor == 'noise':
|
||||||
|
img_source = 'noise'
|
||||||
|
total_frames = None
|
||||||
|
resource_files = None
|
||||||
|
elif distractor == 'none':
|
||||||
|
img_source = None
|
||||||
|
total_frames = None
|
||||||
|
resource_files = None
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
env = dmc2gym.make(
|
||||||
|
domain_name=domain,
|
||||||
|
task_name=task,
|
||||||
|
resource_files=resource_files,
|
||||||
|
img_source=img_source,
|
||||||
|
total_frames=total_frames,
|
||||||
|
seed=config.seed,
|
||||||
|
visualize_reward=False,
|
||||||
|
from_pixels=True,
|
||||||
|
height=config.image_size,
|
||||||
|
width=config.image_size,
|
||||||
|
frame_skip=config.action_repeat
|
||||||
|
)
|
||||||
|
env = wrappers.DMC2GYMWrapper(env)
|
||||||
|
env = wrappers.TimeLimit(env, config.time_limit / config.action_repeat)
|
||||||
|
callbacks = []
|
||||||
|
if store:
|
||||||
|
callbacks.append(lambda ep: tools.save_episodes(datadir, [ep]))
|
||||||
|
callbacks.append(
|
||||||
|
lambda ep: summarize_episode(ep, config, datadir, writer, prefix))
|
||||||
|
env = wrappers.Collect(env, callbacks, config.precision)
|
||||||
|
env = wrappers.RewardObs(env)
|
||||||
|
return env
|
56
Dreamer/local_dm_control_suite/README.md
Executable file
56
Dreamer/local_dm_control_suite/README.md
Executable file
@ -0,0 +1,56 @@
|
|||||||
|
# DeepMind Control Suite.
|
||||||
|
|
||||||
|
This submodule contains the domains and tasks described in the
|
||||||
|
[DeepMind Control Suite tech report](https://arxiv.org/abs/1801.00690).
|
||||||
|
|
||||||
|
## Quickstart
|
||||||
|
|
||||||
|
```python
|
||||||
|
from dm_control import suite
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Load one task:
|
||||||
|
env = suite.load(domain_name="cartpole", task_name="swingup")
|
||||||
|
|
||||||
|
# Iterate over a task set:
|
||||||
|
for domain_name, task_name in suite.BENCHMARKING:
|
||||||
|
env = suite.load(domain_name, task_name)
|
||||||
|
|
||||||
|
# Step through an episode and print out reward, discount and observation.
|
||||||
|
action_spec = env.action_spec()
|
||||||
|
time_step = env.reset()
|
||||||
|
while not time_step.last():
|
||||||
|
action = np.random.uniform(action_spec.minimum,
|
||||||
|
action_spec.maximum,
|
||||||
|
size=action_spec.shape)
|
||||||
|
time_step = env.step(action)
|
||||||
|
print(time_step.reward, time_step.discount, time_step.observation)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Illustration video
|
||||||
|
|
||||||
|
Below is a video montage of solved Control Suite tasks, with reward
|
||||||
|
visualisation enabled.
|
||||||
|
|
||||||
|
[![Video montage](https://img.youtube.com/vi/rAai4QzcYbs/0.jpg)](https://www.youtube.com/watch?v=rAai4QzcYbs)
|
||||||
|
|
||||||
|
|
||||||
|
### Quadruped domain [April 2019]
|
||||||
|
|
||||||
|
Roughly based on the 'ant' model introduced by [Schulman et al. 2015](https://arxiv.org/abs/1506.02438). Main modifications to the body are:
|
||||||
|
|
||||||
|
- 4 DoFs per leg, 1 constraining tendon.
|
||||||
|
- 3 actuators per leg: 'yaw', 'lift', 'extend'.
|
||||||
|
- Filtered position actuators with timescale of 100ms.
|
||||||
|
- Sensors include an IMU, force/torque sensors, and rangefinders.
|
||||||
|
|
||||||
|
Four tasks:
|
||||||
|
|
||||||
|
- `walk` and `run`: self-right the body then move forward at a desired speed.
|
||||||
|
- `escape`: escape a bowl-shaped random terrain (uses rangefinders).
|
||||||
|
- `fetch`, go to a moving ball and bring it to a target.
|
||||||
|
|
||||||
|
All behaviors in the video below were trained with [Abdolmaleki et al's
|
||||||
|
MPO](https://arxiv.org/abs/1806.06920).
|
||||||
|
|
||||||
|
[![Video montage](https://img.youtube.com/vi/RhRLjbb7pBE/0.jpg)](https://www.youtube.com/watch?v=RhRLjbb7pBE)
|
151
Dreamer/local_dm_control_suite/__init__.py
Executable file
151
Dreamer/local_dm_control_suite/__init__.py
Executable file
@ -0,0 +1,151 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""A collection of MuJoCo-based Reinforcement Learning environments."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
import inspect
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
from dm_control.rl import control
|
||||||
|
|
||||||
|
from local_dm_control_suite import acrobot
|
||||||
|
from local_dm_control_suite import ball_in_cup
|
||||||
|
from local_dm_control_suite import cartpole
|
||||||
|
from local_dm_control_suite import cheetah
|
||||||
|
from local_dm_control_suite import finger
|
||||||
|
from local_dm_control_suite import fish
|
||||||
|
from local_dm_control_suite import hopper
|
||||||
|
from local_dm_control_suite import humanoid
|
||||||
|
from local_dm_control_suite import humanoid_CMU
|
||||||
|
from local_dm_control_suite import lqr
|
||||||
|
from local_dm_control_suite import manipulator
|
||||||
|
from local_dm_control_suite import pendulum
|
||||||
|
from local_dm_control_suite import point_mass
|
||||||
|
from local_dm_control_suite import quadruped
|
||||||
|
from local_dm_control_suite import reacher
|
||||||
|
from local_dm_control_suite import stacker
|
||||||
|
from local_dm_control_suite import swimmer
|
||||||
|
from local_dm_control_suite import walker
|
||||||
|
|
||||||
|
# Find all domains imported.
|
||||||
|
_DOMAINS = {name: module for name, module in locals().items()
|
||||||
|
if inspect.ismodule(module) and hasattr(module, 'SUITE')}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_tasks(tag):
|
||||||
|
"""Returns a sequence of (domain name, task name) pairs for the given tag."""
|
||||||
|
result = []
|
||||||
|
|
||||||
|
for domain_name in sorted(_DOMAINS.keys()):
|
||||||
|
domain = _DOMAINS[domain_name]
|
||||||
|
|
||||||
|
if tag is None:
|
||||||
|
tasks_in_domain = domain.SUITE
|
||||||
|
else:
|
||||||
|
tasks_in_domain = domain.SUITE.tagged(tag)
|
||||||
|
|
||||||
|
for task_name in tasks_in_domain.keys():
|
||||||
|
result.append((domain_name, task_name))
|
||||||
|
|
||||||
|
return tuple(result)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_tasks_by_domain(tasks):
|
||||||
|
"""Returns a dict mapping from task name to a tuple of domain names."""
|
||||||
|
result = collections.defaultdict(list)
|
||||||
|
|
||||||
|
for domain_name, task_name in tasks:
|
||||||
|
result[domain_name].append(task_name)
|
||||||
|
|
||||||
|
return {k: tuple(v) for k, v in result.items()}
|
||||||
|
|
||||||
|
|
||||||
|
# A sequence containing all (domain name, task name) pairs.
|
||||||
|
ALL_TASKS = _get_tasks(tag=None)
|
||||||
|
|
||||||
|
# Subsets of ALL_TASKS, generated via the tag mechanism.
|
||||||
|
BENCHMARKING = _get_tasks('benchmarking')
|
||||||
|
EASY = _get_tasks('easy')
|
||||||
|
HARD = _get_tasks('hard')
|
||||||
|
EXTRA = tuple(sorted(set(ALL_TASKS) - set(BENCHMARKING)))
|
||||||
|
|
||||||
|
# A mapping from each domain name to a sequence of its task names.
|
||||||
|
TASKS_BY_DOMAIN = _get_tasks_by_domain(ALL_TASKS)
|
||||||
|
|
||||||
|
|
||||||
|
def load(domain_name, task_name, task_kwargs=None, environment_kwargs=None,
|
||||||
|
visualize_reward=False):
|
||||||
|
"""Returns an environment from a domain name, task name and optional settings.
|
||||||
|
|
||||||
|
```python
|
||||||
|
env = suite.load('cartpole', 'balance')
|
||||||
|
```
|
||||||
|
|
||||||
|
Args:
|
||||||
|
domain_name: A string containing the name of a domain.
|
||||||
|
task_name: A string containing the name of a task.
|
||||||
|
task_kwargs: Optional `dict` of keyword arguments for the task.
|
||||||
|
environment_kwargs: Optional `dict` specifying keyword arguments for the
|
||||||
|
environment.
|
||||||
|
visualize_reward: Optional `bool`. If `True`, object colours in rendered
|
||||||
|
frames are set to indicate the reward at each step. Default `False`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The requested environment.
|
||||||
|
"""
|
||||||
|
return build_environment(domain_name, task_name, task_kwargs,
|
||||||
|
environment_kwargs, visualize_reward)
|
||||||
|
|
||||||
|
|
||||||
|
def build_environment(domain_name, task_name, task_kwargs=None,
|
||||||
|
environment_kwargs=None, visualize_reward=False):
|
||||||
|
"""Returns an environment from the suite given a domain name and a task name.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
domain_name: A string containing the name of a domain.
|
||||||
|
task_name: A string containing the name of a task.
|
||||||
|
task_kwargs: Optional `dict` specifying keyword arguments for the task.
|
||||||
|
environment_kwargs: Optional `dict` specifying keyword arguments for the
|
||||||
|
environment.
|
||||||
|
visualize_reward: Optional `bool`. If `True`, object colours in rendered
|
||||||
|
frames are set to indicate the reward at each step. Default `False`.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the domain or task doesn't exist.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An instance of the requested environment.
|
||||||
|
"""
|
||||||
|
if domain_name not in _DOMAINS:
|
||||||
|
raise ValueError('Domain {!r} does not exist.'.format(domain_name))
|
||||||
|
|
||||||
|
domain = _DOMAINS[domain_name]
|
||||||
|
|
||||||
|
if task_name not in domain.SUITE:
|
||||||
|
raise ValueError('Level {!r} does not exist in domain {!r}.'.format(
|
||||||
|
task_name, domain_name))
|
||||||
|
|
||||||
|
task_kwargs = task_kwargs or {}
|
||||||
|
if environment_kwargs is not None:
|
||||||
|
task_kwargs = task_kwargs.copy()
|
||||||
|
task_kwargs['environment_kwargs'] = environment_kwargs
|
||||||
|
env = domain.SUITE[task_name](**task_kwargs)
|
||||||
|
env.task.visualize_reward = visualize_reward
|
||||||
|
return env
|
127
Dreamer/local_dm_control_suite/acrobot.py
Executable file
127
Dreamer/local_dm_control_suite/acrobot.py
Executable file
@ -0,0 +1,127 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Acrobot domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 10
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('acrobot.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns Acrobot balance task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Balance(sparse=False, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def swingup_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns Acrobot sparse balance."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Balance(sparse=True, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the Acrobot domain."""
|
||||||
|
|
||||||
|
def horizontal(self):
|
||||||
|
"""Returns horizontal (x) component of body frame z-axes."""
|
||||||
|
return self.named.data.xmat[['upper_arm', 'lower_arm'], 'xz']
|
||||||
|
|
||||||
|
def vertical(self):
|
||||||
|
"""Returns vertical (z) component of body frame z-axes."""
|
||||||
|
return self.named.data.xmat[['upper_arm', 'lower_arm'], 'zz']
|
||||||
|
|
||||||
|
def to_target(self):
|
||||||
|
"""Returns the distance from the tip to the target."""
|
||||||
|
tip_to_target = (self.named.data.site_xpos['target'] -
|
||||||
|
self.named.data.site_xpos['tip'])
|
||||||
|
return np.linalg.norm(tip_to_target)
|
||||||
|
|
||||||
|
def orientations(self):
|
||||||
|
"""Returns the sines and cosines of the pole angles."""
|
||||||
|
return np.concatenate((self.horizontal(), self.vertical()))
|
||||||
|
|
||||||
|
|
||||||
|
class Balance(base.Task):
|
||||||
|
"""An Acrobot `Task` to swing up and balance the pole."""
|
||||||
|
|
||||||
|
def __init__(self, sparse, random=None):
|
||||||
|
"""Initializes an instance of `Balance`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sparse: A `bool` specifying whether to use a sparse (indicator) reward.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._sparse = sparse
|
||||||
|
super(Balance, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode.
|
||||||
|
|
||||||
|
Shoulder and elbow are set to a random position between [-pi, pi).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
"""
|
||||||
|
physics.named.data.qpos[
|
||||||
|
['shoulder', 'elbow']] = self.random.uniform(-np.pi, np.pi, 2)
|
||||||
|
super(Balance, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of pole orientation and angular velocities."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['orientations'] = physics.orientations()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def _get_reward(self, physics, sparse):
|
||||||
|
target_radius = physics.named.model.site_size['target', 0]
|
||||||
|
return rewards.tolerance(physics.to_target(),
|
||||||
|
bounds=(0, target_radius),
|
||||||
|
margin=0 if sparse else 1)
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a sparse or a smooth reward, as specified in the constructor."""
|
||||||
|
return self._get_reward(physics, sparse=self._sparse)
|
43
Dreamer/local_dm_control_suite/acrobot.xml
Executable file
43
Dreamer/local_dm_control_suite/acrobot.xml
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
<!--
|
||||||
|
Based on Coulomb's [1] rather than Spong's [2] model.
|
||||||
|
[1] Coulom, Rémi. Reinforcement learning using neural networks, with applications to motor control.
|
||||||
|
Diss. Institut National Polytechnique de Grenoble-INPG, 2002.
|
||||||
|
[2] Spong, Mark W. "The swing up control problem for the acrobot."
|
||||||
|
IEEE control systems 15, no. 1 (1995): 49-55.
|
||||||
|
-->
|
||||||
|
<mujoco model="acrobot">
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<joint damping=".05"/>
|
||||||
|
<geom type="capsule" mass="1"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<option timestep="0.01" integrator="RK4">
|
||||||
|
<flag constraint="disable" energy="enable"/>
|
||||||
|
</option>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<light name="light" pos="0 0 6"/>
|
||||||
|
<geom name="floor" size="3 3 .2" type="plane" material="grid"/>
|
||||||
|
<site name="target" type="sphere" pos="0 0 4" size="0.2" material="target" group="3"/>
|
||||||
|
<camera name="fixed" pos="0 -6 2" zaxis="0 -1 0"/>
|
||||||
|
<camera name="lookat" mode="targetbodycom" target="upper_arm" pos="0 -2 3"/>
|
||||||
|
<body name="upper_arm" pos="0 0 2">
|
||||||
|
<joint name="shoulder" type="hinge" axis="0 1 0"/>
|
||||||
|
<geom name="upper_arm_decoration" material="decoration" type="cylinder" fromto="0 -.06 0 0 .06 0" size="0.051" mass="0"/>
|
||||||
|
<geom name="upper_arm" fromto="0 0 0 0 0 1" size="0.05" material="self"/>
|
||||||
|
<body name="lower_arm" pos="0 0 1">
|
||||||
|
<joint name="elbow" type="hinge" axis="0 1 0"/>
|
||||||
|
<geom name="lower_arm" fromto="0 0 0 0 0 1" size="0.049" material="self"/>
|
||||||
|
<site name="tip" pos="0 0 1" size="0.01"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="elbow" joint="elbow" gear="2" ctrllimited="true" ctrlrange="-1 1"/>
|
||||||
|
</actuator>
|
||||||
|
</mujoco>
|
100
Dreamer/local_dm_control_suite/ball_in_cup.py
Executable file
100
Dreamer/local_dm_control_suite/ball_in_cup.py
Executable file
@ -0,0 +1,100 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Ball-in-Cup Domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.utils import containers
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 20 # (seconds)
|
||||||
|
_CONTROL_TIMESTEP = .02 # (seconds)
|
||||||
|
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('ball_in_cup.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking', 'easy')
|
||||||
|
def catch(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Ball-in-Cup task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = BallInCup(random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics with additional features for the Ball-in-Cup domain."""
|
||||||
|
|
||||||
|
def ball_to_target(self):
|
||||||
|
"""Returns the vector from the ball to the target."""
|
||||||
|
target = self.named.data.site_xpos['target', ['x', 'z']]
|
||||||
|
ball = self.named.data.xpos['ball', ['x', 'z']]
|
||||||
|
return target - ball
|
||||||
|
|
||||||
|
def in_target(self):
|
||||||
|
"""Returns 1 if the ball is in the target, 0 otherwise."""
|
||||||
|
ball_to_target = abs(self.ball_to_target())
|
||||||
|
target_size = self.named.model.site_size['target', [0, 2]]
|
||||||
|
ball_size = self.named.model.geom_size['ball', 0]
|
||||||
|
return float(all(ball_to_target < target_size - ball_size))
|
||||||
|
|
||||||
|
|
||||||
|
class BallInCup(base.Task):
|
||||||
|
"""The Ball-in-Cup task. Put the ball in the cup."""
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Find a collision-free random initial position of the ball.
|
||||||
|
penetrating = True
|
||||||
|
while penetrating:
|
||||||
|
# Assign a random ball position.
|
||||||
|
physics.named.data.qpos['ball_x'] = self.random.uniform(-.2, .2)
|
||||||
|
physics.named.data.qpos['ball_z'] = self.random.uniform(.2, .5)
|
||||||
|
# Check for collisions.
|
||||||
|
physics.after_reset()
|
||||||
|
penetrating = physics.data.ncon > 0
|
||||||
|
super(BallInCup, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of the state."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['position'] = physics.position()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a sparse reward."""
|
||||||
|
return physics.in_target()
|
54
Dreamer/local_dm_control_suite/ball_in_cup.xml
Executable file
54
Dreamer/local_dm_control_suite/ball_in_cup.xml
Executable file
@ -0,0 +1,54 @@
|
|||||||
|
<mujoco model="ball in cup">
|
||||||
|
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1 1" gear="5"/>
|
||||||
|
<default class="cup">
|
||||||
|
<joint type="slide" damping="3" stiffness="20"/>
|
||||||
|
<geom type="capsule" size=".008" material="self"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<light name="light" directional="true" diffuse=".6 .6 .6" pos="0 0 2" specular=".3 .3 .3"/>
|
||||||
|
<geom name="ground" type="plane" pos="0 0 0" size=".6 .2 10" material="grid"/>
|
||||||
|
<camera name="cam0" pos="0 -1 .8" xyaxes="1 0 0 0 1 2"/>
|
||||||
|
<camera name="cam1" pos="0 -1 .4" xyaxes="1 0 0 0 0 1" />
|
||||||
|
|
||||||
|
<body name="cup" pos="0 0 .6" childclass="cup">
|
||||||
|
<joint name="cup_x" axis="1 0 0"/>
|
||||||
|
<joint name="cup_z" axis="0 0 1"/>
|
||||||
|
<geom name="cup_part_0" fromto="-.05 0 0 -.05 0 -.075" />
|
||||||
|
<geom name="cup_part_1" fromto="-.05 0 -.075 -.025 0 -.1" />
|
||||||
|
<geom name="cup_part_2" fromto="-.025 0 -.1 .025 0 -.1" />
|
||||||
|
<geom name="cup_part_3" fromto=".025 0 -.1 .05 0 -.075" />
|
||||||
|
<geom name="cup_part_4" fromto=".05 0 -.075 .05 0 0" />
|
||||||
|
<site name="cup" pos="0 0 -.108" size=".005"/>
|
||||||
|
<site name="target" type="box" pos="0 0 -.05" size=".05 .006 .05" group="4"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="ball" pos="0 0 .2">
|
||||||
|
<joint name="ball_x" type="slide" axis="1 0 0"/>
|
||||||
|
<joint name="ball_z" type="slide" axis="0 0 1"/>
|
||||||
|
<geom name="ball" type="sphere" size=".025" material="effector"/>
|
||||||
|
<site name="ball" size=".005"/>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="x" joint="cup_x"/>
|
||||||
|
<motor name="z" joint="cup_z"/>
|
||||||
|
</actuator>
|
||||||
|
|
||||||
|
<tendon>
|
||||||
|
<spatial name="string" limited="true" range="0 0.3" width="0.003">
|
||||||
|
<site site="ball"/>
|
||||||
|
<site site="cup"/>
|
||||||
|
</spatial>
|
||||||
|
</tendon>
|
||||||
|
|
||||||
|
</mujoco>
|
||||||
|
|
112
Dreamer/local_dm_control_suite/base.py
Executable file
112
Dreamer/local_dm_control_suite/base.py
Executable file
@ -0,0 +1,112 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Base class for tasks in the Control Suite."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class Task(control.Task):
|
||||||
|
"""Base class for tasks in the Control Suite.
|
||||||
|
|
||||||
|
Actions are mapped directly to the states of MuJoCo actuators: each element of
|
||||||
|
the action array is used to set the control input for a single actuator. The
|
||||||
|
ordering of the actuators is the same as in the corresponding MJCF XML file.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
random: A `numpy.random.RandomState` instance. This should be used to
|
||||||
|
generate all random variables associated with the task, such as random
|
||||||
|
starting states, observation noise* etc.
|
||||||
|
|
||||||
|
*If sensor noise is enabled in the MuJoCo model then this will be generated
|
||||||
|
using MuJoCo's internal RNG, which has its own independent state.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, random=None):
|
||||||
|
"""Initializes a new continuous control task.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an integer
|
||||||
|
seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
if not isinstance(random, np.random.RandomState):
|
||||||
|
random = np.random.RandomState(random)
|
||||||
|
self._random = random
|
||||||
|
self._visualize_reward = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def random(self):
|
||||||
|
"""Task-specific `numpy.random.RandomState` instance."""
|
||||||
|
return self._random
|
||||||
|
|
||||||
|
def action_spec(self, physics):
|
||||||
|
"""Returns a `BoundedArraySpec` matching the `physics` actuators."""
|
||||||
|
return mujoco.action_spec(physics)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Resets geom colors to their defaults after starting a new episode.
|
||||||
|
|
||||||
|
Subclasses of `base.Task` must delegate to this method after performing
|
||||||
|
their own initialization.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `mujoco.Physics`.
|
||||||
|
"""
|
||||||
|
self.after_step(physics)
|
||||||
|
|
||||||
|
def before_step(self, action, physics):
|
||||||
|
"""Sets the control signal for the actuators to values in `action`."""
|
||||||
|
# Support legacy internal code.
|
||||||
|
action = getattr(action, "continuous_actions", action)
|
||||||
|
physics.set_control(action)
|
||||||
|
|
||||||
|
def after_step(self, physics):
|
||||||
|
"""Modifies colors according to the reward."""
|
||||||
|
if self._visualize_reward:
|
||||||
|
reward = np.clip(self.get_reward(physics), 0.0, 1.0)
|
||||||
|
_set_reward_colors(physics, reward)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def visualize_reward(self):
|
||||||
|
return self._visualize_reward
|
||||||
|
|
||||||
|
@visualize_reward.setter
|
||||||
|
def visualize_reward(self, value):
|
||||||
|
if not isinstance(value, bool):
|
||||||
|
raise ValueError("Expected a boolean, got {}.".format(type(value)))
|
||||||
|
self._visualize_reward = value
|
||||||
|
|
||||||
|
|
||||||
|
_MATERIALS = ["self", "effector", "target"]
|
||||||
|
_DEFAULT = [name + "_default" for name in _MATERIALS]
|
||||||
|
_HIGHLIGHT = [name + "_highlight" for name in _MATERIALS]
|
||||||
|
|
||||||
|
|
||||||
|
def _set_reward_colors(physics, reward):
|
||||||
|
"""Sets the highlight, effector and target colors according to the reward."""
|
||||||
|
assert 0.0 <= reward <= 1.0
|
||||||
|
colors = physics.named.model.mat_rgba
|
||||||
|
default = colors[_DEFAULT]
|
||||||
|
highlight = colors[_HIGHLIGHT]
|
||||||
|
blend_coef = reward ** 4 # Better color distinction near high rewards.
|
||||||
|
colors[_MATERIALS] = blend_coef * highlight + (1.0 - blend_coef) * default
|
230
Dreamer/local_dm_control_suite/cartpole.py
Executable file
230
Dreamer/local_dm_control_suite/cartpole.py
Executable file
@ -0,0 +1,230 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Cartpole domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
from lxml import etree
|
||||||
|
import numpy as np
|
||||||
|
from six.moves import range
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 10
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets(num_poles=1):
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return _make_model(num_poles), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def balance(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns the Cartpole Balance task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Balance(swing_up=False, sparse=False, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def balance_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns the sparse reward variant of the Cartpole Balance task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Balance(swing_up=False, sparse=True, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns the Cartpole Swing-Up task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Balance(swing_up=True, sparse=False, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def swingup_sparse(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns the sparse reward variant of teh Cartpole Swing-Up task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Balance(swing_up=True, sparse=True, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def two_poles(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns the Cartpole Balance task with two poles."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets(num_poles=2))
|
||||||
|
task = Balance(swing_up=True, sparse=False, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def three_poles(time_limit=_DEFAULT_TIME_LIMIT, random=None, num_poles=3,
|
||||||
|
sparse=False, environment_kwargs=None):
|
||||||
|
"""Returns the Cartpole Balance task with three or more poles."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets(num_poles=num_poles))
|
||||||
|
task = Balance(swing_up=True, sparse=sparse, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_model(n_poles):
|
||||||
|
"""Generates an xml string defining a cart with `n_poles` bodies."""
|
||||||
|
xml_string = common.read_model('cartpole.xml')
|
||||||
|
if n_poles == 1:
|
||||||
|
return xml_string
|
||||||
|
mjcf = etree.fromstring(xml_string)
|
||||||
|
parent = mjcf.find('./worldbody/body/body') # Find first pole.
|
||||||
|
# Make chain of poles.
|
||||||
|
for pole_index in range(2, n_poles+1):
|
||||||
|
child = etree.Element('body', name='pole_{}'.format(pole_index),
|
||||||
|
pos='0 0 1', childclass='pole')
|
||||||
|
etree.SubElement(child, 'joint', name='hinge_{}'.format(pole_index))
|
||||||
|
etree.SubElement(child, 'geom', name='pole_{}'.format(pole_index))
|
||||||
|
parent.append(child)
|
||||||
|
parent = child
|
||||||
|
# Move plane down.
|
||||||
|
floor = mjcf.find('./worldbody/geom')
|
||||||
|
floor.set('pos', '0 0 {}'.format(1 - n_poles - .05))
|
||||||
|
# Move cameras back.
|
||||||
|
cameras = mjcf.findall('./worldbody/camera')
|
||||||
|
cameras[0].set('pos', '0 {} 1'.format(-1 - 2*n_poles))
|
||||||
|
cameras[1].set('pos', '0 {} 2'.format(-2*n_poles))
|
||||||
|
return etree.tostring(mjcf, pretty_print=True)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the Cartpole domain."""
|
||||||
|
|
||||||
|
def cart_position(self):
|
||||||
|
"""Returns the position of the cart."""
|
||||||
|
return self.named.data.qpos['slider'][0]
|
||||||
|
|
||||||
|
def angular_vel(self):
|
||||||
|
"""Returns the angular velocity of the pole."""
|
||||||
|
return self.data.qvel[1:]
|
||||||
|
|
||||||
|
def pole_angle_cosine(self):
|
||||||
|
"""Returns the cosine of the pole angle."""
|
||||||
|
return self.named.data.xmat[2:, 'zz']
|
||||||
|
|
||||||
|
def bounded_position(self):
|
||||||
|
"""Returns the state, with pole angle split into sin/cos."""
|
||||||
|
return np.hstack((self.cart_position(),
|
||||||
|
self.named.data.xmat[2:, ['zz', 'xz']].ravel()))
|
||||||
|
|
||||||
|
|
||||||
|
class Balance(base.Task):
|
||||||
|
"""A Cartpole `Task` to balance the pole.
|
||||||
|
|
||||||
|
State is initialized either close to the target configuration or at a random
|
||||||
|
configuration.
|
||||||
|
"""
|
||||||
|
_CART_RANGE = (-.25, .25)
|
||||||
|
_ANGLE_COSINE_RANGE = (.995, 1)
|
||||||
|
|
||||||
|
def __init__(self, swing_up, sparse, random=None):
|
||||||
|
"""Initializes an instance of `Balance`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
swing_up: A `bool`, which if `True` sets the cart to the middle of the
|
||||||
|
slider and the pole pointing towards the ground. Otherwise, sets the
|
||||||
|
cart to a random position on the slider and the pole to a random
|
||||||
|
near-vertical position.
|
||||||
|
sparse: A `bool`, whether to return a sparse or a smooth reward.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._sparse = sparse
|
||||||
|
self._swing_up = swing_up
|
||||||
|
super(Balance, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode.
|
||||||
|
|
||||||
|
Initializes the cart and pole according to `swing_up`, and in both cases
|
||||||
|
adds a small random initial velocity to break symmetry.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
"""
|
||||||
|
nv = physics.model.nv
|
||||||
|
if self._swing_up:
|
||||||
|
physics.named.data.qpos['slider'] = .01*self.random.randn()
|
||||||
|
physics.named.data.qpos['hinge_1'] = np.pi + .01*self.random.randn()
|
||||||
|
physics.named.data.qpos[2:] = .1*self.random.randn(nv - 2)
|
||||||
|
else:
|
||||||
|
physics.named.data.qpos['slider'] = self.random.uniform(-.1, .1)
|
||||||
|
physics.named.data.qpos[1:] = self.random.uniform(-.034, .034, nv - 1)
|
||||||
|
physics.named.data.qvel[:] = 0.01 * self.random.randn(physics.model.nv)
|
||||||
|
super(Balance, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of the (bounded) physics state."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['position'] = physics.bounded_position()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def _get_reward(self, physics, sparse):
|
||||||
|
if sparse:
|
||||||
|
cart_in_bounds = rewards.tolerance(physics.cart_position(),
|
||||||
|
self._CART_RANGE)
|
||||||
|
angle_in_bounds = rewards.tolerance(physics.pole_angle_cosine(),
|
||||||
|
self._ANGLE_COSINE_RANGE).prod()
|
||||||
|
return cart_in_bounds * angle_in_bounds
|
||||||
|
else:
|
||||||
|
upright = (physics.pole_angle_cosine() + 1) / 2
|
||||||
|
centered = rewards.tolerance(physics.cart_position(), margin=2)
|
||||||
|
centered = (1 + centered) / 2
|
||||||
|
small_control = rewards.tolerance(physics.control(), margin=1,
|
||||||
|
value_at_margin=0,
|
||||||
|
sigmoid='quadratic')[0]
|
||||||
|
small_control = (4 + small_control) / 5
|
||||||
|
small_velocity = rewards.tolerance(physics.angular_vel(), margin=5).min()
|
||||||
|
small_velocity = (1 + small_velocity) / 2
|
||||||
|
return upright.mean() * small_control * small_velocity * centered
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a sparse or a smooth reward, as specified in the constructor."""
|
||||||
|
return self._get_reward(physics, sparse=self._sparse)
|
37
Dreamer/local_dm_control_suite/cartpole.xml
Executable file
37
Dreamer/local_dm_control_suite/cartpole.xml
Executable file
@ -0,0 +1,37 @@
|
|||||||
|
<mujoco model="cart-pole">
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<option timestep="0.01" integrator="RK4">
|
||||||
|
<flag contact="disable" energy="enable"/>
|
||||||
|
</option>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<default class="pole">
|
||||||
|
<joint type="hinge" axis="0 1 0" damping="2e-6"/>
|
||||||
|
<geom type="capsule" fromto="0 0 0 0 0 1" size="0.045" material="self" mass=".1"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<light name="light" pos="0 0 6"/>
|
||||||
|
<camera name="fixed" pos="0 -4 1" zaxis="0 -1 0"/>
|
||||||
|
<camera name="lookatcart" mode="targetbody" target="cart" pos="0 -2 2"/>
|
||||||
|
<geom name="floor" pos="0 0 -.05" size="4 4 .2" type="plane" material="grid"/>
|
||||||
|
<geom name="rail1" type="capsule" pos="0 .07 1" zaxis="1 0 0" size="0.02 2" material="decoration" />
|
||||||
|
<geom name="rail2" type="capsule" pos="0 -.07 1" zaxis="1 0 0" size="0.02 2" material="decoration" />
|
||||||
|
<body name="cart" pos="0 0 1">
|
||||||
|
<joint name="slider" type="slide" limited="true" axis="1 0 0" range="-1.8 1.8" solreflimit=".08 1" damping="5e-4"/>
|
||||||
|
<geom name="cart" type="box" size="0.2 0.15 0.1" material="self" mass="1"/>
|
||||||
|
<body name="pole_1" childclass="pole">
|
||||||
|
<joint name="hinge_1"/>
|
||||||
|
<geom name="pole_1"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="slide" joint="slider" gear="10" ctrllimited="true" ctrlrange="-1 1" />
|
||||||
|
</actuator>
|
||||||
|
</mujoco>
|
97
Dreamer/local_dm_control_suite/cheetah.py
Executable file
97
Dreamer/local_dm_control_suite/cheetah.py
Executable file
@ -0,0 +1,97 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Cheetah Domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
|
||||||
|
|
||||||
|
# How long the simulation will run, in seconds.
|
||||||
|
_DEFAULT_TIME_LIMIT = 10
|
||||||
|
|
||||||
|
# Running speed above which reward is 1.
|
||||||
|
_RUN_SPEED = 10
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('cheetah.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the run task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Cheetah(random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(physics, task, time_limit=time_limit,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the Cheetah domain."""
|
||||||
|
|
||||||
|
def speed(self):
|
||||||
|
"""Returns the horizontal speed of the Cheetah."""
|
||||||
|
return self.named.data.sensordata['torso_subtreelinvel'][0]
|
||||||
|
|
||||||
|
|
||||||
|
class Cheetah(base.Task):
|
||||||
|
"""A `Task` to train a running Cheetah."""
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode."""
|
||||||
|
# The indexing below assumes that all joints have a single DOF.
|
||||||
|
assert physics.model.nq == physics.model.njnt
|
||||||
|
is_limited = physics.model.jnt_limited == 1
|
||||||
|
lower, upper = physics.model.jnt_range[is_limited].T
|
||||||
|
physics.data.qpos[is_limited] = self.random.uniform(lower, upper)
|
||||||
|
|
||||||
|
# Stabilize the model before the actual simulation.
|
||||||
|
for _ in range(200):
|
||||||
|
physics.step()
|
||||||
|
|
||||||
|
physics.data.time = 0
|
||||||
|
self._timeout_progress = 0
|
||||||
|
super(Cheetah, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of the state, ignoring horizontal position."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
# Ignores horizontal position to maintain translational invariance.
|
||||||
|
obs['position'] = physics.data.qpos[1:].copy()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a reward to the agent."""
|
||||||
|
return rewards.tolerance(physics.speed(),
|
||||||
|
bounds=(_RUN_SPEED, float('inf')),
|
||||||
|
margin=_RUN_SPEED,
|
||||||
|
value_at_margin=0,
|
||||||
|
sigmoid='linear')
|
73
Dreamer/local_dm_control_suite/cheetah.xml
Executable file
73
Dreamer/local_dm_control_suite/cheetah.xml
Executable file
@ -0,0 +1,73 @@
|
|||||||
|
<mujoco model="cheetah">
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/materials_white_floor.xml"/>
|
||||||
|
|
||||||
|
<compiler settotalmass="14"/>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<default class="cheetah">
|
||||||
|
<joint limited="true" damping=".01" armature=".1" stiffness="8" type="hinge" axis="0 1 0"/>
|
||||||
|
<geom contype="1" conaffinity="1" condim="3" friction=".4 .1 .1" material="self"/>
|
||||||
|
</default>
|
||||||
|
<default class="free">
|
||||||
|
<joint limited="false" damping="0" armature="0" stiffness="0"/>
|
||||||
|
</default>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1 1"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<statistic center="0 0 .7" extent="2"/>
|
||||||
|
|
||||||
|
<option timestep="0.01"/>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<geom name="ground" type="plane" conaffinity="1" pos="98 0 0" size="100 .8 .5" material="grid"/>
|
||||||
|
<body name="torso" pos="0 0 .7" childclass="cheetah">
|
||||||
|
<light name="light" pos="0 0 2" mode="trackcom"/>
|
||||||
|
<camera name="side" pos="0 -3 0" quat="0.707 0.707 0 0" mode="trackcom"/>
|
||||||
|
<camera name="back" pos="-1.8 -1.3 0.8" xyaxes="0.45 -0.9 0 0.3 0.15 0.94" mode="trackcom"/>
|
||||||
|
<joint name="rootx" type="slide" axis="1 0 0" class="free"/>
|
||||||
|
<joint name="rootz" type="slide" axis="0 0 1" class="free"/>
|
||||||
|
<joint name="rooty" type="hinge" axis="0 1 0" class="free"/>
|
||||||
|
<geom name="torso" type="capsule" fromto="-.5 0 0 .5 0 0" size="0.046"/>
|
||||||
|
<geom name="head" type="capsule" pos=".6 0 .1" euler="0 50 0" size="0.046 .15"/>
|
||||||
|
<body name="bthigh" pos="-.5 0 0">
|
||||||
|
<joint name="bthigh" range="-30 60" stiffness="240" damping="6"/>
|
||||||
|
<geom name="bthigh" type="capsule" pos=".1 0 -.13" euler="0 -218 0" size="0.046 .145"/>
|
||||||
|
<body name="bshin" pos=".16 0 -.25">
|
||||||
|
<joint name="bshin" range="-50 50" stiffness="180" damping="4.5"/>
|
||||||
|
<geom name="bshin" type="capsule" pos="-.14 0 -.07" euler="0 -116 0" size="0.046 .15"/>
|
||||||
|
<body name="bfoot" pos="-.28 0 -.14">
|
||||||
|
<joint name="bfoot" range="-230 50" stiffness="120" damping="3"/>
|
||||||
|
<geom name="bfoot" type="capsule" pos=".03 0 -.097" euler="0 -15 0" size="0.046 .094"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="fthigh" pos=".5 0 0">
|
||||||
|
<joint name="fthigh" range="-57 .40" stiffness="180" damping="4.5"/>
|
||||||
|
<geom name="fthigh" type="capsule" pos="-.07 0 -.12" euler="0 30 0" size="0.046 .133"/>
|
||||||
|
<body name="fshin" pos="-.14 0 -.24">
|
||||||
|
<joint name="fshin" range="-70 50" stiffness="120" damping="3"/>
|
||||||
|
<geom name="fshin" type="capsule" pos=".065 0 -.09" euler="0 -34 0" size="0.046 .106"/>
|
||||||
|
<body name="ffoot" pos=".13 0 -.18">
|
||||||
|
<joint name="ffoot" range="-28 28" stiffness="60" damping="1.5"/>
|
||||||
|
<geom name="ffoot" type="capsule" pos=".045 0 -.07" euler="0 -34 0" size="0.046 .07"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<sensor>
|
||||||
|
<subtreelinvel name="torso_subtreelinvel" body="torso"/>
|
||||||
|
</sensor>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="bthigh" joint="bthigh" gear="120" />
|
||||||
|
<motor name="bshin" joint="bshin" gear="90" />
|
||||||
|
<motor name="bfoot" joint="bfoot" gear="60" />
|
||||||
|
<motor name="fthigh" joint="fthigh" gear="90" />
|
||||||
|
<motor name="fshin" joint="fshin" gear="60" />
|
||||||
|
<motor name="ffoot" joint="ffoot" gear="30" />
|
||||||
|
</actuator>
|
||||||
|
</mujoco>
|
39
Dreamer/local_dm_control_suite/common/__init__.py
Executable file
39
Dreamer/local_dm_control_suite/common/__init__.py
Executable file
@ -0,0 +1,39 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Functions to manage the common assets for domains."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dm_control.utils import io as resources
|
||||||
|
|
||||||
|
_SUITE_DIR = os.path.dirname(os.path.dirname(__file__))
|
||||||
|
_FILENAMES = [
|
||||||
|
"./common/materials.xml",
|
||||||
|
"./common/materials_white_floor.xml",
|
||||||
|
"./common/skybox.xml",
|
||||||
|
"./common/visual.xml",
|
||||||
|
]
|
||||||
|
|
||||||
|
ASSETS = {filename: resources.GetResource(os.path.join(_SUITE_DIR, filename))
|
||||||
|
for filename in _FILENAMES}
|
||||||
|
|
||||||
|
|
||||||
|
def read_model(model_filename):
|
||||||
|
"""Reads a model XML file and returns its contents as a string."""
|
||||||
|
return resources.GetResource(os.path.join(_SUITE_DIR, model_filename))
|
23
Dreamer/local_dm_control_suite/common/materials.xml
Executable file
23
Dreamer/local_dm_control_suite/common/materials.xml
Executable file
@ -0,0 +1,23 @@
|
|||||||
|
<!--
|
||||||
|
Common textures, colors and materials to be used throughout this suite. Some
|
||||||
|
materials such as xxx_highlight are activated on occurence of certain events,
|
||||||
|
for example receiving a positive reward.
|
||||||
|
-->
|
||||||
|
<mujoco>
|
||||||
|
<asset>
|
||||||
|
<texture name="grid" type="2d" builtin="checker" rgb1=".1 .1 .4" rgb2=".2 .2 .8" width="300" height="300" mark="edge" markrgb=".1 .1 .4"/>
|
||||||
|
<material name="grid" texture="grid" texrepeat="1 1" texuniform="true" reflectance=".2"/>
|
||||||
|
<material name="self" rgba=".7 .5 .3 1"/>
|
||||||
|
<material name="self_default" rgba=".7 .5 .3 1"/>
|
||||||
|
<material name="self_highlight" rgba="0 .5 .3 1"/>
|
||||||
|
<material name="effector" rgba=".7 .4 .2 1"/>
|
||||||
|
<material name="effector_default" rgba=".7 .4 .2 1"/>
|
||||||
|
<material name="effector_highlight" rgba="0 .5 .3 1"/>
|
||||||
|
<material name="decoration" rgba=".7 .5 .3 1"/>
|
||||||
|
<material name="eye" rgba="0 .2 1 1"/>
|
||||||
|
<material name="target" rgba=".6 .3 .3 1"/>
|
||||||
|
<material name="target_default" rgba=".6 .3 .3 1"/>
|
||||||
|
<material name="target_highlight" rgba=".6 .3 .3 .4"/>
|
||||||
|
<material name="site" rgba=".5 .5 .5 .3"/>
|
||||||
|
</asset>
|
||||||
|
</mujoco>
|
23
Dreamer/local_dm_control_suite/common/materials_white_floor.xml
Executable file
23
Dreamer/local_dm_control_suite/common/materials_white_floor.xml
Executable file
@ -0,0 +1,23 @@
|
|||||||
|
<!--
|
||||||
|
Common textures, colors and materials to be used throughout this suite. Some
|
||||||
|
materials such as xxx_highlight are activated on occurence of certain events,
|
||||||
|
for example receiving a positive reward.
|
||||||
|
-->
|
||||||
|
<mujoco>
|
||||||
|
<asset>
|
||||||
|
<texture name="grid" type="2d" builtin="checker" rgb1=".1 .1 .1" rgb2=".2 .2 .2" width="300" height="300" mark="edge" markrgb=".1 .1 .1"/>
|
||||||
|
<material name="grid" texture="grid" texrepeat="1 1" texuniform="true" reflectance=".2"/>
|
||||||
|
<material name="self" rgba=".7 .5 .3 1"/>
|
||||||
|
<material name="self_default" rgba=".7 .5 .3 1"/>
|
||||||
|
<material name="self_highlight" rgba="0 .5 .3 1"/>
|
||||||
|
<material name="effector" rgba=".7 .4 .2 1"/>
|
||||||
|
<material name="effector_default" rgba=".7 .4 .2 1"/>
|
||||||
|
<material name="effector_highlight" rgba="0 .5 .3 1"/>
|
||||||
|
<material name="decoration" rgba=".3 .5 .7 1"/>
|
||||||
|
<material name="eye" rgba="0 .2 1 1"/>
|
||||||
|
<material name="target" rgba=".6 .3 .3 1"/>
|
||||||
|
<material name="target_default" rgba=".6 .3 .3 1"/>
|
||||||
|
<material name="target_highlight" rgba=".6 .3 .3 .4"/>
|
||||||
|
<material name="site" rgba=".5 .5 .5 .3"/>
|
||||||
|
</asset>
|
||||||
|
</mujoco>
|
6
Dreamer/local_dm_control_suite/common/skybox.xml
Executable file
6
Dreamer/local_dm_control_suite/common/skybox.xml
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
<mujoco>
|
||||||
|
<asset>
|
||||||
|
<texture name="skybox" type="skybox" builtin="gradient" rgb1=".4 .6 .8" rgb2="0 0 0"
|
||||||
|
width="800" height="800" mark="random" markrgb="0 0 0"/>
|
||||||
|
</asset>
|
||||||
|
</mujoco>
|
7
Dreamer/local_dm_control_suite/common/visual.xml
Executable file
7
Dreamer/local_dm_control_suite/common/visual.xml
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
<mujoco>
|
||||||
|
<visual>
|
||||||
|
<headlight ambient=".4 .4 .4" diffuse=".8 .8 .8" specular="0.1 0.1 0.1"/>
|
||||||
|
<map znear=".01"/>
|
||||||
|
<quality shadowsize="2048"/>
|
||||||
|
</visual>
|
||||||
|
</mujoco>
|
84
Dreamer/local_dm_control_suite/demos/mocap_demo.py
Executable file
84
Dreamer/local_dm_control_suite/demos/mocap_demo.py
Executable file
@ -0,0 +1,84 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Demonstration of amc parsing for CMU mocap database.
|
||||||
|
|
||||||
|
To run the demo, supply a path to a `.amc` file:
|
||||||
|
|
||||||
|
python mocap_demo --filename='path/to/mocap.amc'
|
||||||
|
|
||||||
|
CMU motion capture clips are available at mocap.cs.cmu.edu
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import time
|
||||||
|
# Internal dependencies.
|
||||||
|
|
||||||
|
from absl import app
|
||||||
|
from absl import flags
|
||||||
|
|
||||||
|
from local_dm_control_suite import humanoid_CMU
|
||||||
|
from dm_control.suite.utils import parse_amc
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
FLAGS = flags.FLAGS
|
||||||
|
flags.DEFINE_string('filename', None, 'amc file to be converted.')
|
||||||
|
flags.DEFINE_integer('max_num_frames', 90,
|
||||||
|
'Maximum number of frames for plotting/playback')
|
||||||
|
|
||||||
|
|
||||||
|
def main(unused_argv):
|
||||||
|
env = humanoid_CMU.stand()
|
||||||
|
|
||||||
|
# Parse and convert specified clip.
|
||||||
|
converted = parse_amc.convert(FLAGS.filename,
|
||||||
|
env.physics, env.control_timestep())
|
||||||
|
|
||||||
|
max_frame = min(FLAGS.max_num_frames, converted.qpos.shape[1] - 1)
|
||||||
|
|
||||||
|
width = 480
|
||||||
|
height = 480
|
||||||
|
video = np.zeros((max_frame, height, 2 * width, 3), dtype=np.uint8)
|
||||||
|
|
||||||
|
for i in range(max_frame):
|
||||||
|
p_i = converted.qpos[:, i]
|
||||||
|
with env.physics.reset_context():
|
||||||
|
env.physics.data.qpos[:] = p_i
|
||||||
|
video[i] = np.hstack([env.physics.render(height, width, camera_id=0),
|
||||||
|
env.physics.render(height, width, camera_id=1)])
|
||||||
|
|
||||||
|
tic = time.time()
|
||||||
|
for i in range(max_frame):
|
||||||
|
if i == 0:
|
||||||
|
img = plt.imshow(video[i])
|
||||||
|
else:
|
||||||
|
img.set_data(video[i])
|
||||||
|
toc = time.time()
|
||||||
|
clock_dt = toc - tic
|
||||||
|
tic = time.time()
|
||||||
|
# Real-time playback not always possible as clock_dt > .03
|
||||||
|
plt.pause(max(0.01, 0.03 - clock_dt)) # Need min display time > 0.0.
|
||||||
|
plt.draw()
|
||||||
|
plt.waitforbuttonpress()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
flags.mark_flag_as_required('filename')
|
||||||
|
app.run(main)
|
213
Dreamer/local_dm_control_suite/demos/zeros.amc
Executable file
213
Dreamer/local_dm_control_suite/demos/zeros.amc
Executable file
@ -0,0 +1,213 @@
|
|||||||
|
#DUMMY AMC for testing
|
||||||
|
:FULLY-SPECIFIED
|
||||||
|
:DEGREES
|
||||||
|
1
|
||||||
|
root 0 0 0 0 0 0
|
||||||
|
lowerback 0 0 0
|
||||||
|
upperback 0 0 0
|
||||||
|
thorax 0 0 0
|
||||||
|
lowerneck 0 0 0
|
||||||
|
upperneck 0 0 0
|
||||||
|
head 0 0 0
|
||||||
|
rclavicle 0 0
|
||||||
|
rhumerus 0 0 0
|
||||||
|
rradius 0
|
||||||
|
rwrist 0
|
||||||
|
rhand 0 0
|
||||||
|
rfingers 0
|
||||||
|
rthumb 0 0
|
||||||
|
lclavicle 0 0
|
||||||
|
lhumerus 0 0 0
|
||||||
|
lradius 0
|
||||||
|
lwrist 0
|
||||||
|
lhand 0 0
|
||||||
|
lfingers 0
|
||||||
|
lthumb 0 0
|
||||||
|
rfemur 0 0 0
|
||||||
|
rtibia 0
|
||||||
|
rfoot 0 0
|
||||||
|
rtoes 0
|
||||||
|
lfemur 0 0 0
|
||||||
|
ltibia 0
|
||||||
|
lfoot 0 0
|
||||||
|
ltoes 0
|
||||||
|
2
|
||||||
|
root 0 0 0 0 0 0
|
||||||
|
lowerback 0 0 0
|
||||||
|
upperback 0 0 0
|
||||||
|
thorax 0 0 0
|
||||||
|
lowerneck 0 0 0
|
||||||
|
upperneck 0 0 0
|
||||||
|
head 0 0 0
|
||||||
|
rclavicle 0 0
|
||||||
|
rhumerus 0 0 0
|
||||||
|
rradius 0
|
||||||
|
rwrist 0
|
||||||
|
rhand 0 0
|
||||||
|
rfingers 0
|
||||||
|
rthumb 0 0
|
||||||
|
lclavicle 0 0
|
||||||
|
lhumerus 0 0 0
|
||||||
|
lradius 0
|
||||||
|
lwrist 0
|
||||||
|
lhand 0 0
|
||||||
|
lfingers 0
|
||||||
|
lthumb 0 0
|
||||||
|
rfemur 0 0 0
|
||||||
|
rtibia 0
|
||||||
|
rfoot 0 0
|
||||||
|
rtoes 0
|
||||||
|
lfemur 0 0 0
|
||||||
|
ltibia 0
|
||||||
|
lfoot 0 0
|
||||||
|
ltoes 0
|
||||||
|
3
|
||||||
|
root 0 0 0 0 0 0
|
||||||
|
lowerback 0 0 0
|
||||||
|
upperback 0 0 0
|
||||||
|
thorax 0 0 0
|
||||||
|
lowerneck 0 0 0
|
||||||
|
upperneck 0 0 0
|
||||||
|
head 0 0 0
|
||||||
|
rclavicle 0 0
|
||||||
|
rhumerus 0 0 0
|
||||||
|
rradius 0
|
||||||
|
rwrist 0
|
||||||
|
rhand 0 0
|
||||||
|
rfingers 0
|
||||||
|
rthumb 0 0
|
||||||
|
lclavicle 0 0
|
||||||
|
lhumerus 0 0 0
|
||||||
|
lradius 0
|
||||||
|
lwrist 0
|
||||||
|
lhand 0 0
|
||||||
|
lfingers 0
|
||||||
|
lthumb 0 0
|
||||||
|
rfemur 0 0 0
|
||||||
|
rtibia 0
|
||||||
|
rfoot 0 0
|
||||||
|
rtoes 0
|
||||||
|
lfemur 0 0 0
|
||||||
|
ltibia 0
|
||||||
|
lfoot 0 0
|
||||||
|
ltoes 0
|
||||||
|
4
|
||||||
|
root 0 0 0 0 0 0
|
||||||
|
lowerback 0 0 0
|
||||||
|
upperback 0 0 0
|
||||||
|
thorax 0 0 0
|
||||||
|
lowerneck 0 0 0
|
||||||
|
upperneck 0 0 0
|
||||||
|
head 0 0 0
|
||||||
|
rclavicle 0 0
|
||||||
|
rhumerus 0 0 0
|
||||||
|
rradius 0
|
||||||
|
rwrist 0
|
||||||
|
rhand 0 0
|
||||||
|
rfingers 0
|
||||||
|
rthumb 0 0
|
||||||
|
lclavicle 0 0
|
||||||
|
lhumerus 0 0 0
|
||||||
|
lradius 0
|
||||||
|
lwrist 0
|
||||||
|
lhand 0 0
|
||||||
|
lfingers 0
|
||||||
|
lthumb 0 0
|
||||||
|
rfemur 0 0 0
|
||||||
|
rtibia 0
|
||||||
|
rfoot 0 0
|
||||||
|
rtoes 0
|
||||||
|
lfemur 0 0 0
|
||||||
|
ltibia 0
|
||||||
|
lfoot 0 0
|
||||||
|
ltoes 0
|
||||||
|
5
|
||||||
|
root 0 0 0 0 0 0
|
||||||
|
lowerback 0 0 0
|
||||||
|
upperback 0 0 0
|
||||||
|
thorax 0 0 0
|
||||||
|
lowerneck 0 0 0
|
||||||
|
upperneck 0 0 0
|
||||||
|
head 0 0 0
|
||||||
|
rclavicle 0 0
|
||||||
|
rhumerus 0 0 0
|
||||||
|
rradius 0
|
||||||
|
rwrist 0
|
||||||
|
rhand 0 0
|
||||||
|
rfingers 0
|
||||||
|
rthumb 0 0
|
||||||
|
lclavicle 0 0
|
||||||
|
lhumerus 0 0 0
|
||||||
|
lradius 0
|
||||||
|
lwrist 0
|
||||||
|
lhand 0 0
|
||||||
|
lfingers 0
|
||||||
|
lthumb 0 0
|
||||||
|
rfemur 0 0 0
|
||||||
|
rtibia 0
|
||||||
|
rfoot 0 0
|
||||||
|
rtoes 0
|
||||||
|
lfemur 0 0 0
|
||||||
|
ltibia 0
|
||||||
|
lfoot 0 0
|
||||||
|
ltoes 0
|
||||||
|
6
|
||||||
|
root 0 0 0 0 0 0
|
||||||
|
lowerback 0 0 0
|
||||||
|
upperback 0 0 0
|
||||||
|
thorax 0 0 0
|
||||||
|
lowerneck 0 0 0
|
||||||
|
upperneck 0 0 0
|
||||||
|
head 0 0 0
|
||||||
|
rclavicle 0 0
|
||||||
|
rhumerus 0 0 0
|
||||||
|
rradius 0
|
||||||
|
rwrist 0
|
||||||
|
rhand 0 0
|
||||||
|
rfingers 0
|
||||||
|
rthumb 0 0
|
||||||
|
lclavicle 0 0
|
||||||
|
lhumerus 0 0 0
|
||||||
|
lradius 0
|
||||||
|
lwrist 0
|
||||||
|
lhand 0 0
|
||||||
|
lfingers 0
|
||||||
|
lthumb 0 0
|
||||||
|
rfemur 0 0 0
|
||||||
|
rtibia 0
|
||||||
|
rfoot 0 0
|
||||||
|
rtoes 0
|
||||||
|
lfemur 0 0 0
|
||||||
|
ltibia 0
|
||||||
|
lfoot 0 0
|
||||||
|
ltoes 0
|
||||||
|
7
|
||||||
|
root 0 0 0 0 0 0
|
||||||
|
lowerback 0 0 0
|
||||||
|
upperback 0 0 0
|
||||||
|
thorax 0 0 0
|
||||||
|
lowerneck 0 0 0
|
||||||
|
upperneck 0 0 0
|
||||||
|
head 0 0 0
|
||||||
|
rclavicle 0 0
|
||||||
|
rhumerus 0 0 0
|
||||||
|
rradius 0
|
||||||
|
rwrist 0
|
||||||
|
rhand 0 0
|
||||||
|
rfingers 0
|
||||||
|
rthumb 0 0
|
||||||
|
lclavicle 0 0
|
||||||
|
lhumerus 0 0 0
|
||||||
|
lradius 0
|
||||||
|
lwrist 0
|
||||||
|
lhand 0 0
|
||||||
|
lfingers 0
|
||||||
|
lthumb 0 0
|
||||||
|
rfemur 0 0 0
|
||||||
|
rtibia 0
|
||||||
|
rfoot 0 0
|
||||||
|
rtoes 0
|
||||||
|
lfemur 0 0 0
|
||||||
|
ltibia 0
|
||||||
|
lfoot 0 0
|
||||||
|
ltoes 0
|
84
Dreamer/local_dm_control_suite/explore.py
Executable file
84
Dreamer/local_dm_control_suite/explore.py
Executable file
@ -0,0 +1,84 @@
|
|||||||
|
# Copyright 2018 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
"""Control suite environments explorer."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
from absl import app
|
||||||
|
from absl import flags
|
||||||
|
from dm_control import suite
|
||||||
|
from dm_control.suite.wrappers import action_noise
|
||||||
|
from six.moves import input
|
||||||
|
|
||||||
|
from dm_control import viewer
|
||||||
|
|
||||||
|
|
||||||
|
_ALL_NAMES = ['.'.join(domain_task) for domain_task in suite.ALL_TASKS]
|
||||||
|
|
||||||
|
flags.DEFINE_enum('environment_name', None, _ALL_NAMES,
|
||||||
|
'Optional \'domain_name.task_name\' pair specifying the '
|
||||||
|
'environment to load. If unspecified a prompt will appear to '
|
||||||
|
'select one.')
|
||||||
|
flags.DEFINE_bool('timeout', True, 'Whether episodes should have a time limit.')
|
||||||
|
flags.DEFINE_bool('visualize_reward', True,
|
||||||
|
'Whether to vary the colors of geoms according to the '
|
||||||
|
'current reward value.')
|
||||||
|
flags.DEFINE_float('action_noise', 0.,
|
||||||
|
'Standard deviation of Gaussian noise to apply to actions, '
|
||||||
|
'expressed as a fraction of the max-min range for each '
|
||||||
|
'action dimension. Defaults to 0, i.e. no noise.')
|
||||||
|
FLAGS = flags.FLAGS
|
||||||
|
|
||||||
|
|
||||||
|
def prompt_environment_name(prompt, values):
|
||||||
|
environment_name = None
|
||||||
|
while not environment_name:
|
||||||
|
environment_name = input(prompt)
|
||||||
|
if not environment_name or values.index(environment_name) < 0:
|
||||||
|
print('"%s" is not a valid environment name.' % environment_name)
|
||||||
|
environment_name = None
|
||||||
|
return environment_name
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
|
del argv
|
||||||
|
environment_name = FLAGS.environment_name
|
||||||
|
if environment_name is None:
|
||||||
|
print('\n '.join(['Available environments:'] + _ALL_NAMES))
|
||||||
|
environment_name = prompt_environment_name(
|
||||||
|
'Please select an environment name: ', _ALL_NAMES)
|
||||||
|
|
||||||
|
index = _ALL_NAMES.index(environment_name)
|
||||||
|
domain_name, task_name = suite.ALL_TASKS[index]
|
||||||
|
|
||||||
|
task_kwargs = {}
|
||||||
|
if not FLAGS.timeout:
|
||||||
|
task_kwargs['time_limit'] = float('inf')
|
||||||
|
|
||||||
|
def loader():
|
||||||
|
env = suite.load(
|
||||||
|
domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs)
|
||||||
|
env.task.visualize_reward = FLAGS.visualize_reward
|
||||||
|
if FLAGS.action_noise > 0:
|
||||||
|
env = action_noise.Wrapper(env, scale=FLAGS.action_noise)
|
||||||
|
return env
|
||||||
|
|
||||||
|
viewer.launch(loader)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(main)
|
217
Dreamer/local_dm_control_suite/finger.py
Executable file
217
Dreamer/local_dm_control_suite/finger.py
Executable file
@ -0,0 +1,217 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Finger Domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.suite.utils import randomizers
|
||||||
|
from dm_control.utils import containers
|
||||||
|
import numpy as np
|
||||||
|
from six.moves import range
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 20 # (seconds)
|
||||||
|
_CONTROL_TIMESTEP = .02 # (seconds)
|
||||||
|
# For TURN tasks, the 'tip' geom needs to enter a spherical target of sizes:
|
||||||
|
_EASY_TARGET_SIZE = 0.07
|
||||||
|
_HARD_TARGET_SIZE = 0.03
|
||||||
|
# Initial spin velocity for the Stop task.
|
||||||
|
_INITIAL_SPIN_VELOCITY = 100
|
||||||
|
# Spinning slower than this value (radian/second) is considered stopped.
|
||||||
|
_STOP_VELOCITY = 1e-6
|
||||||
|
# Spinning faster than this value (radian/second) is considered spinning.
|
||||||
|
_SPIN_VELOCITY = 15.0
|
||||||
|
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('finger.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def spin(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Spin task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Spin(random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def turn_easy(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns the easy Turn task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Turn(target_radius=_EASY_TARGET_SIZE, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def turn_hard(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns the hard Turn task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Turn(target_radius=_HARD_TARGET_SIZE, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the Finger domain."""
|
||||||
|
|
||||||
|
def touch(self):
|
||||||
|
"""Returns logarithmically scaled signals from the two touch sensors."""
|
||||||
|
return np.log1p(self.named.data.sensordata[['touchtop', 'touchbottom']])
|
||||||
|
|
||||||
|
def hinge_velocity(self):
|
||||||
|
"""Returns the velocity of the hinge joint."""
|
||||||
|
return self.named.data.sensordata['hinge_velocity']
|
||||||
|
|
||||||
|
def tip_position(self):
|
||||||
|
"""Returns the (x,z) position of the tip relative to the hinge."""
|
||||||
|
return (self.named.data.sensordata['tip'][[0, 2]] -
|
||||||
|
self.named.data.sensordata['spinner'][[0, 2]])
|
||||||
|
|
||||||
|
def bounded_position(self):
|
||||||
|
"""Returns the positions, with the hinge angle replaced by tip position."""
|
||||||
|
return np.hstack((self.named.data.sensordata[['proximal', 'distal']],
|
||||||
|
self.tip_position()))
|
||||||
|
|
||||||
|
def velocity(self):
|
||||||
|
"""Returns the velocities (extracted from sensordata)."""
|
||||||
|
return self.named.data.sensordata[['proximal_velocity',
|
||||||
|
'distal_velocity',
|
||||||
|
'hinge_velocity']]
|
||||||
|
|
||||||
|
def target_position(self):
|
||||||
|
"""Returns the (x,z) position of the target relative to the hinge."""
|
||||||
|
return (self.named.data.sensordata['target'][[0, 2]] -
|
||||||
|
self.named.data.sensordata['spinner'][[0, 2]])
|
||||||
|
|
||||||
|
def to_target(self):
|
||||||
|
"""Returns the vector from the tip to the target."""
|
||||||
|
return self.target_position() - self.tip_position()
|
||||||
|
|
||||||
|
def dist_to_target(self):
|
||||||
|
"""Returns the signed distance to the target surface, negative is inside."""
|
||||||
|
return (np.linalg.norm(self.to_target()) -
|
||||||
|
self.named.model.site_size['target', 0])
|
||||||
|
|
||||||
|
|
||||||
|
class Spin(base.Task):
|
||||||
|
"""A Finger `Task` to spin the stopped body."""
|
||||||
|
|
||||||
|
def __init__(self, random=None):
|
||||||
|
"""Initializes a new `Spin` instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
super(Spin, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
physics.named.model.site_rgba['target', 3] = 0
|
||||||
|
physics.named.model.site_rgba['tip', 3] = 0
|
||||||
|
physics.named.model.dof_damping['hinge'] = .03
|
||||||
|
_set_random_joint_angles(physics, self.random)
|
||||||
|
super(Spin, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns state and touch sensors, and target info."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['position'] = physics.bounded_position()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
obs['touch'] = physics.touch()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a sparse reward."""
|
||||||
|
return float(physics.hinge_velocity() <= -_SPIN_VELOCITY)
|
||||||
|
|
||||||
|
|
||||||
|
class Turn(base.Task):
|
||||||
|
"""A Finger `Task` to turn the body to a target angle."""
|
||||||
|
|
||||||
|
def __init__(self, target_radius, random=None):
|
||||||
|
"""Initializes a new `Turn` instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
target_radius: Radius of the target site, which specifies the goal angle.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._target_radius = target_radius
|
||||||
|
super(Turn, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
target_angle = self.random.uniform(-np.pi, np.pi)
|
||||||
|
hinge_x, hinge_z = physics.named.data.xanchor['hinge', ['x', 'z']]
|
||||||
|
radius = physics.named.model.geom_size['cap1'].sum()
|
||||||
|
target_x = hinge_x + radius * np.sin(target_angle)
|
||||||
|
target_z = hinge_z + radius * np.cos(target_angle)
|
||||||
|
physics.named.model.site_pos['target', ['x', 'z']] = target_x, target_z
|
||||||
|
physics.named.model.site_size['target', 0] = self._target_radius
|
||||||
|
|
||||||
|
_set_random_joint_angles(physics, self.random)
|
||||||
|
|
||||||
|
super(Turn, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns state, touch sensors, and target info."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['position'] = physics.bounded_position()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
obs['touch'] = physics.touch()
|
||||||
|
obs['target_position'] = physics.target_position()
|
||||||
|
obs['dist_to_target'] = physics.dist_to_target()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
return float(physics.dist_to_target() <= 0)
|
||||||
|
|
||||||
|
|
||||||
|
def _set_random_joint_angles(physics, random, max_attempts=1000):
|
||||||
|
"""Sets the joints to a random collision-free state."""
|
||||||
|
|
||||||
|
for _ in range(max_attempts):
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, random)
|
||||||
|
# Check for collisions.
|
||||||
|
physics.after_reset()
|
||||||
|
if physics.data.ncon == 0:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise RuntimeError('Could not find a collision-free state '
|
||||||
|
'after {} attempts'.format(max_attempts))
|
72
Dreamer/local_dm_control_suite/finger.xml
Executable file
72
Dreamer/local_dm_control_suite/finger.xml
Executable file
@ -0,0 +1,72 @@
|
|||||||
|
<mujoco model="finger">
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<option timestep="0.01" cone="elliptic" iterations="200">
|
||||||
|
<flag gravity="disable"/>
|
||||||
|
</option>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<geom solimp="0 0.9 0.01" solref=".02 1"/>
|
||||||
|
<joint type="hinge" axis="0 -1 0"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1 1"/>
|
||||||
|
<default class="finger">
|
||||||
|
<joint damping="2.5" limited="true"/>
|
||||||
|
<site type="ellipsoid" size=".025 .03 .025" material="site" group="3"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<light name="light" directional="true" diffuse=".6 .6 .6" pos="0 0 2" specular=".3 .3 .3"/>
|
||||||
|
<geom name="ground" type="plane" pos="0 0 0" size=".6 .2 10" material="grid"/>
|
||||||
|
<camera name="cam0" pos="0 -1 .8" xyaxes="1 0 0 0 1 2"/>
|
||||||
|
<camera name="cam1" pos="0 -1 .4" xyaxes="1 0 0 0 0 1" />
|
||||||
|
|
||||||
|
<body name="proximal" pos="-.2 0 .4" childclass="finger">
|
||||||
|
<geom name="proximal_decoration" type="cylinder" fromto="0 -.033 0 0 .033 0" size=".034" material="decoration"/>
|
||||||
|
<joint name="proximal" range="-110 110" ref="-90"/>
|
||||||
|
<geom name="proximal" type="capsule" material="self" size=".03" fromto="0 0 0 0 0 -.17"/>
|
||||||
|
<body name="distal" pos="0 0 -.18" childclass="finger">
|
||||||
|
<joint name="distal" range="-110 110"/>
|
||||||
|
<geom name="distal" type="capsule" size=".028" material="self" fromto="0 0 0 0 0 -.16" contype="0" conaffinity="0"/>
|
||||||
|
<geom name="fingertip" type="capsule" size=".03" material="effector" fromto="0 0 -.13 0 0 -.161"/>
|
||||||
|
<site name="touchtop" pos=".01 0 -.17"/>
|
||||||
|
<site name="touchbottom" pos="-.01 0 -.17"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="spinner" pos=".2 0 .4">
|
||||||
|
<joint name="hinge" frictionloss=".1" damping=".5"/>
|
||||||
|
<geom name="cap1" type="capsule" size=".04 .09" material="self" pos=".02 0 0"/>
|
||||||
|
<geom name="cap2" type="capsule" size=".04 .09" material="self" pos="-.02 0 0"/>
|
||||||
|
<site name="tip" type="sphere" size=".02" pos="0 0 .13" material="target"/>
|
||||||
|
<geom name="spinner_decoration" type="cylinder" fromto="0 -.045 0 0 .045 0" size=".02" material="decoration"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<site name="target" type="sphere" size=".03" pos="0 0 .4" material="target"/>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="proximal" joint="proximal" gear="30"/>
|
||||||
|
<motor name="distal" joint="distal" gear="15"/>
|
||||||
|
</actuator>
|
||||||
|
|
||||||
|
<!-- All finger observations are functions of sensors. This is useful for finite-differencing. -->
|
||||||
|
<sensor>
|
||||||
|
<jointpos name="proximal" joint="proximal"/>
|
||||||
|
<jointpos name="distal" joint="distal"/>
|
||||||
|
<jointvel name="proximal_velocity" joint="proximal"/>
|
||||||
|
<jointvel name="distal_velocity" joint="distal"/>
|
||||||
|
<jointvel name="hinge_velocity" joint="hinge"/>
|
||||||
|
<framepos name="tip" objtype="site" objname="tip"/>
|
||||||
|
<framepos name="target" objtype="site" objname="target"/>
|
||||||
|
<framepos name="spinner" objtype="xbody" objname="spinner"/>
|
||||||
|
<touch name="touchtop" site="touchtop"/>
|
||||||
|
<touch name="touchbottom" site="touchbottom"/>
|
||||||
|
<framepos name="touchtop_pos" objtype="site" objname="touchtop"/>
|
||||||
|
<framepos name="touchbottom_pos" objtype="site" objname="touchbottom"/>
|
||||||
|
</sensor>
|
||||||
|
|
||||||
|
</mujoco>
|
||||||
|
|
176
Dreamer/local_dm_control_suite/fish.py
Executable file
176
Dreamer/local_dm_control_suite/fish.py
Executable file
@ -0,0 +1,176 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Fish Domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 40
|
||||||
|
_CONTROL_TIMESTEP = .04
|
||||||
|
_JOINTS = ['tail1',
|
||||||
|
'tail_twist',
|
||||||
|
'tail2',
|
||||||
|
'finright_roll',
|
||||||
|
'finright_pitch',
|
||||||
|
'finleft_roll',
|
||||||
|
'finleft_pitch']
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('fish.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def upright(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns the Fish Upright task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Upright(random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def swim(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Fish Swim task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Swim(random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the Fish domain."""
|
||||||
|
|
||||||
|
def upright(self):
|
||||||
|
"""Returns projection from z-axes of torso to the z-axes of worldbody."""
|
||||||
|
return self.named.data.xmat['torso', 'zz']
|
||||||
|
|
||||||
|
def torso_velocity(self):
|
||||||
|
"""Returns velocities and angular velocities of the torso."""
|
||||||
|
return self.data.sensordata
|
||||||
|
|
||||||
|
def joint_velocities(self):
|
||||||
|
"""Returns the joint velocities."""
|
||||||
|
return self.named.data.qvel[_JOINTS]
|
||||||
|
|
||||||
|
def joint_angles(self):
|
||||||
|
"""Returns the joint positions."""
|
||||||
|
return self.named.data.qpos[_JOINTS]
|
||||||
|
|
||||||
|
def mouth_to_target(self):
|
||||||
|
"""Returns a vector, from mouth to target in local coordinate of mouth."""
|
||||||
|
data = self.named.data
|
||||||
|
mouth_to_target_global = data.geom_xpos['target'] - data.geom_xpos['mouth']
|
||||||
|
return mouth_to_target_global.dot(data.geom_xmat['mouth'].reshape(3, 3))
|
||||||
|
|
||||||
|
|
||||||
|
class Upright(base.Task):
|
||||||
|
"""A Fish `Task` for getting the torso upright with smooth reward."""
|
||||||
|
|
||||||
|
def __init__(self, random=None):
|
||||||
|
"""Initializes an instance of `Upright`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
random: Either an existing `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically.
|
||||||
|
"""
|
||||||
|
super(Upright, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Randomizes the tail and fin angles and the orientation of the Fish."""
|
||||||
|
quat = self.random.randn(4)
|
||||||
|
physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
|
||||||
|
for joint in _JOINTS:
|
||||||
|
physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
|
||||||
|
# Hide the target. It's irrelevant for this task.
|
||||||
|
physics.named.model.geom_rgba['target', 3] = 0
|
||||||
|
super(Upright, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of joint angles, velocities and uprightness."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['joint_angles'] = physics.joint_angles()
|
||||||
|
obs['upright'] = physics.upright()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a smooth reward."""
|
||||||
|
return rewards.tolerance(physics.upright(), bounds=(1, 1), margin=1)
|
||||||
|
|
||||||
|
|
||||||
|
class Swim(base.Task):
|
||||||
|
"""A Fish `Task` for swimming with smooth reward."""
|
||||||
|
|
||||||
|
def __init__(self, random=None):
|
||||||
|
"""Initializes an instance of `Swim`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
super(Swim, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode."""
|
||||||
|
|
||||||
|
quat = self.random.randn(4)
|
||||||
|
physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
|
||||||
|
for joint in _JOINTS:
|
||||||
|
physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
|
||||||
|
# Randomize target position.
|
||||||
|
physics.named.model.geom_pos['target', 'x'] = self.random.uniform(-.4, .4)
|
||||||
|
physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-.4, .4)
|
||||||
|
physics.named.model.geom_pos['target', 'z'] = self.random.uniform(.1, .3)
|
||||||
|
super(Swim, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of joints, target direction and velocities."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['joint_angles'] = physics.joint_angles()
|
||||||
|
obs['upright'] = physics.upright()
|
||||||
|
obs['target'] = physics.mouth_to_target()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a smooth reward."""
|
||||||
|
radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum()
|
||||||
|
in_target = rewards.tolerance(np.linalg.norm(physics.mouth_to_target()),
|
||||||
|
bounds=(0, radii), margin=2*radii)
|
||||||
|
is_upright = 0.5 * (physics.upright() + 1)
|
||||||
|
return (7*in_target + is_upright) / 8
|
85
Dreamer/local_dm_control_suite/fish.xml
Executable file
85
Dreamer/local_dm_control_suite/fish.xml
Executable file
@ -0,0 +1,85 @@
|
|||||||
|
<mujoco model="fish">
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
<asset>
|
||||||
|
<texture name="skybox" type="skybox" builtin="gradient" rgb1=".4 .6 .8" rgb2="0 0 0" width="800" height="800" mark="random" markrgb="1 1 1"/>
|
||||||
|
</asset>
|
||||||
|
|
||||||
|
|
||||||
|
<option timestep="0.004" density="5000">
|
||||||
|
<flag gravity="disable" constraint="disable"/>
|
||||||
|
</option>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<general ctrllimited="true"/>
|
||||||
|
<default class="fish">
|
||||||
|
<joint type="hinge" limited="false" range="-60 60" damping="2e-5" solreflimit=".1 1" solimplimit="0 .8 .1"/>
|
||||||
|
<geom material="self"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<camera name="tracking_top" pos="0 0 1" xyaxes="1 0 0 0 1 0" mode="trackcom"/>
|
||||||
|
<camera name="tracking_x" pos="-.3 0 .2" xyaxes="0 -1 0 0.342 0 0.940" fovy="60" mode="trackcom"/>
|
||||||
|
<camera name="tracking_y" pos="0 -.3 .2" xyaxes="1 0 0 0 0.342 0.940" fovy="60" mode="trackcom"/>
|
||||||
|
<camera name="fixed_top" pos="0 0 5.5" fovy="10"/>
|
||||||
|
<geom name="ground" type="plane" size=".5 .5 .1" material="grid"/>
|
||||||
|
<geom name="target" type="sphere" pos="0 .4 .1" size=".04" material="target"/>
|
||||||
|
<body name="torso" pos="0 0 .1" childclass="fish">
|
||||||
|
<light name="light" diffuse=".6 .6 .6" pos="0 0 0.5" dir="0 0 -1" specular=".3 .3 .3" mode="track"/>
|
||||||
|
<joint name="root" type="free" damping="0" limited="false"/>
|
||||||
|
<site name="torso" size=".01" rgba="0 0 0 0"/>
|
||||||
|
<geom name="eye" type="ellipsoid" pos="0 .055 .015" size=".008 .012 .008" euler="-10 0 0" material="eye" mass="0"/>
|
||||||
|
<camera name="eye" pos="0 .06 .02" xyaxes="1 0 0 0 0 1"/>
|
||||||
|
<geom name="mouth" type="capsule" fromto="0 .079 0 0 .07 0" size=".005" material="effector" mass="0"/>
|
||||||
|
<geom name="lower_mouth" type="capsule" fromto="0 .079 -.004 0 .07 -.003" size=".0045" material="effector" mass="0"/>
|
||||||
|
<geom name="torso" type="ellipsoid" size=".01 .08 .04" mass="0"/>
|
||||||
|
<geom name="back_fin" type="ellipsoid" size=".001 .03 .015" pos="0 -.03 .03" material="effector" mass="0"/>
|
||||||
|
<geom name="torso_massive" type="box" size=".002 .06 .03" group="4"/>
|
||||||
|
<body name="tail1" pos="0 -.09 0">
|
||||||
|
<joint name="tail1" axis="0 0 1" pos="0 .01 0"/>
|
||||||
|
<joint name="tail_twist" axis="0 1 0" pos="0 .01 0" range="-30 30"/>
|
||||||
|
<geom name="tail1" type="ellipsoid" size=".001 .008 .016"/>
|
||||||
|
<body name="tail2" pos="0 -.028 0">
|
||||||
|
<joint name="tail2" axis="0 0 1" pos="0 .02 0" stiffness="8e-5"/>
|
||||||
|
<geom name="tail2" type="ellipsoid" size=".001 .018 .035"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="finright" pos=".01 0 0">
|
||||||
|
<joint name="finright_roll" axis="0 1 0"/>
|
||||||
|
<joint name="finright_pitch" axis="1 0 0" pos="0 .005 0"/>
|
||||||
|
<geom name="finright" type="ellipsoid" pos=".015 0 0" size=".02 .015 .001" />
|
||||||
|
</body>
|
||||||
|
<body name="finleft" pos="-.01 0 0">
|
||||||
|
<joint name="finleft_roll" axis="0 1 0"/>
|
||||||
|
<joint name="finleft_pitch" axis="1 0 0" pos="0 .005 0"/>
|
||||||
|
<geom name="finleft" type="ellipsoid" pos="-.015 0 0" size=".02 .015 .001"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<tendon>
|
||||||
|
<fixed name="fins_flap">
|
||||||
|
<joint joint="finleft_roll" coef="-.5"/>
|
||||||
|
<joint joint="finright_roll" coef=".5"/>
|
||||||
|
</fixed>
|
||||||
|
<fixed name="fins_sym" stiffness="1e-4">
|
||||||
|
<joint joint="finleft_roll" coef=".5"/>
|
||||||
|
<joint joint="finright_roll" coef=".5"/>
|
||||||
|
</fixed>
|
||||||
|
</tendon>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<position name="tail" joint="tail1" ctrlrange="-1 1" kp="5e-4"/>
|
||||||
|
<position name="tail_twist" joint="tail_twist" ctrlrange="-1 1" kp="1e-4"/>
|
||||||
|
<position name="fins_flap" tendon="fins_flap" ctrlrange="-1 1" kp="3e-4"/>
|
||||||
|
<position name="finleft_pitch" joint="finleft_pitch" ctrlrange="-1 1" kp="1e-4"/>
|
||||||
|
<position name="finright_pitch" joint="finright_pitch" ctrlrange="-1 1" kp="1e-4"/>
|
||||||
|
</actuator>
|
||||||
|
|
||||||
|
<sensor>
|
||||||
|
<velocimeter name="velocimeter" site="torso"/>
|
||||||
|
<gyro name="gyro" site="torso"/>
|
||||||
|
</sensor>
|
||||||
|
</mujoco>
|
||||||
|
|
138
Dreamer/local_dm_control_suite/hopper.py
Executable file
138
Dreamer/local_dm_control_suite/hopper.py
Executable file
@ -0,0 +1,138 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Hopper domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.suite.utils import randomizers
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
_CONTROL_TIMESTEP = .02 # (Seconds)
|
||||||
|
|
||||||
|
# Default duration of an episode, in seconds.
|
||||||
|
_DEFAULT_TIME_LIMIT = 20
|
||||||
|
|
||||||
|
# Minimal height of torso over foot above which stand reward is 1.
|
||||||
|
_STAND_HEIGHT = 0.6
|
||||||
|
|
||||||
|
# Hopping speed above which hop reward is 1.
|
||||||
|
_HOP_SPEED = 2
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('hopper.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns a Hopper that strives to stand upright, balancing its pose."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Hopper(hopping=False, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def hop(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns a Hopper that strives to hop forward."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Hopper(hopping=True, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the Hopper domain."""
|
||||||
|
|
||||||
|
def height(self):
|
||||||
|
"""Returns height of torso with respect to foot."""
|
||||||
|
return (self.named.data.xipos['torso', 'z'] -
|
||||||
|
self.named.data.xipos['foot', 'z'])
|
||||||
|
|
||||||
|
def speed(self):
|
||||||
|
"""Returns horizontal speed of the Hopper."""
|
||||||
|
return self.named.data.sensordata['torso_subtreelinvel'][0]
|
||||||
|
|
||||||
|
def touch(self):
|
||||||
|
"""Returns the signals from two foot touch sensors."""
|
||||||
|
return np.log1p(self.named.data.sensordata[['touch_toe', 'touch_heel']])
|
||||||
|
|
||||||
|
|
||||||
|
class Hopper(base.Task):
|
||||||
|
"""A Hopper's `Task` to train a standing and a jumping Hopper."""
|
||||||
|
|
||||||
|
def __init__(self, hopping, random=None):
|
||||||
|
"""Initialize an instance of `Hopper`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hopping: Boolean, if True the task is to hop forwards, otherwise it is to
|
||||||
|
balance upright.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._hopping = hopping
|
||||||
|
super(Hopper, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode."""
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, self.random)
|
||||||
|
self._timeout_progress = 0
|
||||||
|
super(Hopper, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of positions, velocities and touch sensors."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
# Ignores horizontal position to maintain translational invariance:
|
||||||
|
obs['position'] = physics.data.qpos[1:].copy()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
obs['touch'] = physics.touch()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a reward applicable to the performed task."""
|
||||||
|
standing = rewards.tolerance(physics.height(), (_STAND_HEIGHT, 2))
|
||||||
|
if self._hopping:
|
||||||
|
hopping = rewards.tolerance(physics.speed(),
|
||||||
|
bounds=(_HOP_SPEED, float('inf')),
|
||||||
|
margin=_HOP_SPEED/2,
|
||||||
|
value_at_margin=0.5,
|
||||||
|
sigmoid='linear')
|
||||||
|
return standing * hopping
|
||||||
|
else:
|
||||||
|
small_control = rewards.tolerance(physics.control(),
|
||||||
|
margin=1, value_at_margin=0,
|
||||||
|
sigmoid='quadratic').mean()
|
||||||
|
small_control = (small_control + 4) / 5
|
||||||
|
return standing * small_control
|
66
Dreamer/local_dm_control_suite/hopper.xml
Executable file
66
Dreamer/local_dm_control_suite/hopper.xml
Executable file
@ -0,0 +1,66 @@
|
|||||||
|
<mujoco model="planar hopper">
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/materials_white_floor.xml"/>
|
||||||
|
|
||||||
|
<statistic extent="2" center="0 0 .5"/>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<default class="hopper">
|
||||||
|
<joint type="hinge" axis="0 1 0" limited="true" damping=".05" armature=".2"/>
|
||||||
|
<geom type="capsule" material="self"/>
|
||||||
|
<site type="sphere" size="0.05" group="3"/>
|
||||||
|
</default>
|
||||||
|
<default class="free">
|
||||||
|
<joint limited="false" damping="0" armature="0" stiffness="0"/>
|
||||||
|
</default>
|
||||||
|
<motor ctrlrange="-1 1" ctrllimited="true"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<option timestep="0.005"/>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<camera name="cam0" pos="0 -2.8 0.8" euler="90 0 0" mode="trackcom"/>
|
||||||
|
<camera name="back" pos="-2 -.2 1.2" xyaxes="0.2 -1 0 .5 0 2" mode="trackcom"/>
|
||||||
|
<geom name="floor" type="plane" conaffinity="1" pos="48 0 0" size="50 1 .2" material="grid"/>
|
||||||
|
<body name="torso" pos="0 0 1" childclass="hopper">
|
||||||
|
<light name="top" pos="0 0 2" mode="trackcom"/>
|
||||||
|
<joint name="rootx" type="slide" axis="1 0 0" class="free"/>
|
||||||
|
<joint name="rootz" type="slide" axis="0 0 1" class="free"/>
|
||||||
|
<joint name="rooty" type="hinge" axis="0 1 0" class="free"/>
|
||||||
|
<geom name="torso" fromto="0 0 -.05 0 0 .2" size="0.0653"/>
|
||||||
|
<geom name="nose" fromto=".08 0 .13 .15 0 .14" size="0.03"/>
|
||||||
|
<body name="pelvis" pos="0 0 -.05">
|
||||||
|
<joint name="waist" range="-30 30"/>
|
||||||
|
<geom name="pelvis" fromto="0 0 0 0 0 -.15" size="0.065"/>
|
||||||
|
<body name="thigh" pos="0 0 -.2">
|
||||||
|
<joint name="hip" range="-170 10"/>
|
||||||
|
<geom name="thigh" fromto="0 0 0 0 0 -.33" size="0.04"/>
|
||||||
|
<body name="calf" pos="0 0 -.33">
|
||||||
|
<joint name="knee" range="5 150"/>
|
||||||
|
<geom name="calf" fromto="0 0 0 0 0 -.32" size="0.03"/>
|
||||||
|
<body name="foot" pos="0 0 -.32">
|
||||||
|
<joint name="ankle" range="-45 45"/>
|
||||||
|
<geom name="foot" fromto="-.08 0 0 .17 0 0" size="0.04"/>
|
||||||
|
<site name="touch_toe" pos=".17 0 0"/>
|
||||||
|
<site name="touch_heel" pos="-.08 0 0"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<sensor>
|
||||||
|
<subtreelinvel name="torso_subtreelinvel" body="torso"/>
|
||||||
|
<touch name="touch_toe" site="touch_toe"/>
|
||||||
|
<touch name="touch_heel" site="touch_heel"/>
|
||||||
|
</sensor>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="waist" joint="waist" gear="30"/>
|
||||||
|
<motor name="hip" joint="hip" gear="40"/>
|
||||||
|
<motor name="knee" joint="knee" gear="30"/>
|
||||||
|
<motor name="ankle" joint="ankle" gear="10"/>
|
||||||
|
</actuator>
|
||||||
|
</mujoco>
|
211
Dreamer/local_dm_control_suite/humanoid.py
Executable file
211
Dreamer/local_dm_control_suite/humanoid.py
Executable file
@ -0,0 +1,211 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Humanoid Domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.suite.utils import randomizers
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 25
|
||||||
|
_CONTROL_TIMESTEP = .025
|
||||||
|
|
||||||
|
# Height of head above which stand reward is 1.
|
||||||
|
_STAND_HEIGHT = 1.4
|
||||||
|
|
||||||
|
# Horizontal speeds above which move reward is 1.
|
||||||
|
_WALK_SPEED = 1
|
||||||
|
_RUN_SPEED = 10
|
||||||
|
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('humanoid.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Stand task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Humanoid(move_speed=0, pure_state=False, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Walk task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Humanoid(move_speed=_WALK_SPEED, pure_state=False, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Run task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Humanoid(move_speed=_RUN_SPEED, pure_state=False, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def run_pure_state(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns the Run task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Humanoid(move_speed=_RUN_SPEED, pure_state=True, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the Walker domain."""
|
||||||
|
|
||||||
|
def torso_upright(self):
|
||||||
|
"""Returns projection from z-axes of torso to the z-axes of world."""
|
||||||
|
return self.named.data.xmat['torso', 'zz']
|
||||||
|
|
||||||
|
def head_height(self):
|
||||||
|
"""Returns the height of the torso."""
|
||||||
|
return self.named.data.xpos['head', 'z']
|
||||||
|
|
||||||
|
def center_of_mass_position(self):
|
||||||
|
"""Returns position of the center-of-mass."""
|
||||||
|
return self.named.data.subtree_com['torso'].copy()
|
||||||
|
|
||||||
|
def center_of_mass_velocity(self):
|
||||||
|
"""Returns the velocity of the center-of-mass."""
|
||||||
|
return self.named.data.sensordata['torso_subtreelinvel'].copy()
|
||||||
|
|
||||||
|
def torso_vertical_orientation(self):
|
||||||
|
"""Returns the z-projection of the torso orientation matrix."""
|
||||||
|
return self.named.data.xmat['torso', ['zx', 'zy', 'zz']]
|
||||||
|
|
||||||
|
def joint_angles(self):
|
||||||
|
"""Returns the state without global orientation or position."""
|
||||||
|
return self.data.qpos[7:].copy() # Skip the 7 DoFs of the free root joint.
|
||||||
|
|
||||||
|
def extremities(self):
|
||||||
|
"""Returns end effector positions in egocentric frame."""
|
||||||
|
torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
|
||||||
|
torso_pos = self.named.data.xpos['torso']
|
||||||
|
positions = []
|
||||||
|
for side in ('left_', 'right_'):
|
||||||
|
for limb in ('hand', 'foot'):
|
||||||
|
torso_to_limb = self.named.data.xpos[side + limb] - torso_pos
|
||||||
|
positions.append(torso_to_limb.dot(torso_frame))
|
||||||
|
return np.hstack(positions)
|
||||||
|
|
||||||
|
|
||||||
|
class Humanoid(base.Task):
|
||||||
|
"""A humanoid task."""
|
||||||
|
|
||||||
|
def __init__(self, move_speed, pure_state, random=None):
|
||||||
|
"""Initializes an instance of `Humanoid`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
move_speed: A float. If this value is zero, reward is given simply for
|
||||||
|
standing up. Otherwise this specifies a target horizontal velocity for
|
||||||
|
the walking task.
|
||||||
|
pure_state: A bool. Whether the observations consist of the pure MuJoCo
|
||||||
|
state or includes some useful features thereof.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._move_speed = move_speed
|
||||||
|
self._pure_state = pure_state
|
||||||
|
super(Humanoid, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Find a collision-free random initial configuration.
|
||||||
|
penetrating = True
|
||||||
|
while penetrating:
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, self.random)
|
||||||
|
# Check for collisions.
|
||||||
|
physics.after_reset()
|
||||||
|
penetrating = physics.data.ncon > 0
|
||||||
|
super(Humanoid, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns either the pure state or a set of egocentric features."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
if self._pure_state:
|
||||||
|
obs['position'] = physics.position()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
else:
|
||||||
|
obs['joint_angles'] = physics.joint_angles()
|
||||||
|
obs['head_height'] = physics.head_height()
|
||||||
|
obs['extremities'] = physics.extremities()
|
||||||
|
obs['torso_vertical'] = physics.torso_vertical_orientation()
|
||||||
|
obs['com_velocity'] = physics.center_of_mass_velocity()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a reward to the agent."""
|
||||||
|
standing = rewards.tolerance(physics.head_height(),
|
||||||
|
bounds=(_STAND_HEIGHT, float('inf')),
|
||||||
|
margin=_STAND_HEIGHT/4)
|
||||||
|
upright = rewards.tolerance(physics.torso_upright(),
|
||||||
|
bounds=(0.9, float('inf')), sigmoid='linear',
|
||||||
|
margin=1.9, value_at_margin=0)
|
||||||
|
stand_reward = standing * upright
|
||||||
|
small_control = rewards.tolerance(physics.control(), margin=1,
|
||||||
|
value_at_margin=0,
|
||||||
|
sigmoid='quadratic').mean()
|
||||||
|
small_control = (4 + small_control) / 5
|
||||||
|
if self._move_speed == 0:
|
||||||
|
horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]]
|
||||||
|
dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean()
|
||||||
|
return small_control * stand_reward * dont_move
|
||||||
|
else:
|
||||||
|
com_velocity = np.linalg.norm(physics.center_of_mass_velocity()[[0, 1]])
|
||||||
|
move = rewards.tolerance(com_velocity,
|
||||||
|
bounds=(self._move_speed, float('inf')),
|
||||||
|
margin=self._move_speed, value_at_margin=0,
|
||||||
|
sigmoid='linear')
|
||||||
|
move = (5*move + 1) / 6
|
||||||
|
return small_control * stand_reward * move
|
202
Dreamer/local_dm_control_suite/humanoid.xml
Executable file
202
Dreamer/local_dm_control_suite/humanoid.xml
Executable file
@ -0,0 +1,202 @@
|
|||||||
|
<mujoco model="humanoid">
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<statistic extent="2" center="0 0 1"/>
|
||||||
|
|
||||||
|
<option timestep=".005"/>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<motor ctrlrange="-1 1" ctrllimited="true"/>
|
||||||
|
<default class="body">
|
||||||
|
<geom type="capsule" condim="1" friction=".7" solimp=".9 .99 .003" solref=".015 1" material="self"/>
|
||||||
|
<joint type="hinge" damping=".2" stiffness="1" armature=".01" limited="true" solimplimit="0 .99 .01"/>
|
||||||
|
<default class="big_joint">
|
||||||
|
<joint damping="5" stiffness="10"/>
|
||||||
|
<default class="big_stiff_joint">
|
||||||
|
<joint stiffness="20"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
<site size=".04" group="3"/>
|
||||||
|
<default class="force-torque">
|
||||||
|
<site type="box" size=".01 .01 .02" rgba="1 0 0 1" />
|
||||||
|
</default>
|
||||||
|
<default class="touch">
|
||||||
|
<site type="capsule" rgba="0 0 1 .3"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<geom name="floor" type="plane" conaffinity="1" size="100 100 .2" material="grid"/>
|
||||||
|
<body name="torso" pos="0 0 1.5" childclass="body">
|
||||||
|
<light name="top" pos="0 0 2" mode="trackcom"/>
|
||||||
|
<camera name="back" pos="-3 0 1" xyaxes="0 -1 0 1 0 2" mode="trackcom"/>
|
||||||
|
<camera name="side" pos="0 -3 1" xyaxes="1 0 0 0 1 2" mode="trackcom"/>
|
||||||
|
<freejoint name="root"/>
|
||||||
|
<site name="root" class="force-torque"/>
|
||||||
|
<geom name="torso" fromto="0 -.07 0 0 .07 0" size=".07"/>
|
||||||
|
<geom name="upper_waist" fromto="-.01 -.06 -.12 -.01 .06 -.12" size=".06"/>
|
||||||
|
<site name="torso" class="touch" type="box" pos="0 0 -.05" size=".075 .14 .13"/>
|
||||||
|
<body name="head" pos="0 0 .19">
|
||||||
|
<geom name="head" type="sphere" size=".09"/>
|
||||||
|
<site name="head" class="touch" type="sphere" size=".091"/>
|
||||||
|
<camera name="egocentric" pos=".09 0 0" xyaxes="0 -1 0 .1 0 1" fovy="80"/>
|
||||||
|
</body>
|
||||||
|
<body name="lower_waist" pos="-.01 0 -.260" quat="1.000 0 -.002 0">
|
||||||
|
<geom name="lower_waist" fromto="0 -.06 0 0 .06 0" size=".06"/>
|
||||||
|
<site name="lower_waist" class="touch" size=".061 .06" zaxis="0 1 0"/>
|
||||||
|
<joint name="abdomen_z" pos="0 0 .065" axis="0 0 1" range="-45 45" class="big_stiff_joint"/>
|
||||||
|
<joint name="abdomen_y" pos="0 0 .065" axis="0 1 0" range="-75 30" class="big_joint"/>
|
||||||
|
<body name="pelvis" pos="0 0 -.165" quat="1.000 0 -.002 0">
|
||||||
|
<joint name="abdomen_x" pos="0 0 .1" axis="1 0 0" range="-35 35" class="big_joint"/>
|
||||||
|
<geom name="butt" fromto="-.02 -.07 0 -.02 .07 0" size=".09"/>
|
||||||
|
<site name="butt" class="touch" size=".091 .07" pos="-.02 0 0" zaxis="0 1 0"/>
|
||||||
|
<body name="right_thigh" pos="0 -.1 -.04">
|
||||||
|
<site name="right_hip" class="force-torque"/>
|
||||||
|
<joint name="right_hip_x" axis="1 0 0" range="-25 5" class="big_joint"/>
|
||||||
|
<joint name="right_hip_z" axis="0 0 1" range="-60 35" class="big_joint"/>
|
||||||
|
<joint name="right_hip_y" axis="0 1 0" range="-110 20" class="big_stiff_joint"/>
|
||||||
|
<geom name="right_thigh" fromto="0 0 0 0 .01 -.34" size=".06"/>
|
||||||
|
<site name="right_thigh" class="touch" pos="0 .005 -.17" size=".061 .17" zaxis="0 -1 34"/>
|
||||||
|
<body name="right_shin" pos="0 .01 -.403">
|
||||||
|
<site name="right_knee" class="force-torque" pos="0 0 .02"/>
|
||||||
|
<joint name="right_knee" pos="0 0 .02" axis="0 -1 0" range="-160 2"/>
|
||||||
|
<geom name="right_shin" fromto="0 0 0 0 0 -.3" size=".049"/>
|
||||||
|
<site name="right_shin" class="touch" pos="0 0 -.15" size=".05 .15"/>
|
||||||
|
<body name="right_foot" pos="0 0 -.39">
|
||||||
|
<site name="right_ankle" class="force-torque"/>
|
||||||
|
<joint name="right_ankle_y" pos="0 0 .08" axis="0 1 0" range="-50 50" stiffness="6"/>
|
||||||
|
<joint name="right_ankle_x" pos="0 0 .04" axis="1 0 .5" range="-50 50" stiffness="3"/>
|
||||||
|
<geom name="right_right_foot" fromto="-.07 -.02 0 .14 -.04 0" size=".027"/>
|
||||||
|
<geom name="left_right_foot" fromto="-.07 0 0 .14 .02 0" size=".027"/>
|
||||||
|
<site name="right_right_foot" class="touch" pos=".035 -.03 0" size=".03 .11" zaxis="21 -2 0"/>
|
||||||
|
<site name="left_right_foot" class="touch" pos=".035 .01 0" size=".03 .11" zaxis="21 2 0"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="left_thigh" pos="0 .1 -.04">
|
||||||
|
<site name="left_hip" class="force-torque"/>
|
||||||
|
<joint name="left_hip_x" axis="-1 0 0" range="-25 5" class="big_joint"/>
|
||||||
|
<joint name="left_hip_z" axis="0 0 -1" range="-60 35" class="big_joint"/>
|
||||||
|
<joint name="left_hip_y" axis="0 1 0" range="-120 20" class="big_stiff_joint"/>
|
||||||
|
<geom name="left_thigh" fromto="0 0 0 0 -.01 -.34" size=".06"/>
|
||||||
|
<site name="left_thigh" class="touch" pos="0 -.005 -.17" size=".061 .17" zaxis="0 1 34"/>
|
||||||
|
<body name="left_shin" pos="0 -.01 -.403">
|
||||||
|
<site name="left_knee" class="force-torque" pos="0 0 .02"/>
|
||||||
|
<joint name="left_knee" pos="0 0 .02" axis="0 -1 0" range="-160 2"/>
|
||||||
|
<geom name="left_shin" fromto="0 0 0 0 0 -.3" size=".049"/>
|
||||||
|
<site name="left_shin" class="touch" pos="0 0 -.15" size=".05 .15"/>
|
||||||
|
<body name="left_foot" pos="0 0 -.39">
|
||||||
|
<site name="left_ankle" class="force-torque"/>
|
||||||
|
<joint name="left_ankle_y" pos="0 0 .08" axis="0 1 0" range="-50 50" stiffness="6"/>
|
||||||
|
<joint name="left_ankle_x" pos="0 0 .04" axis="1 0 .5" range="-50 50" stiffness="3"/>
|
||||||
|
<geom name="left_left_foot" fromto="-.07 .02 0 .14 .04 0" size=".027"/>
|
||||||
|
<geom name="right_left_foot" fromto="-.07 0 0 .14 -.02 0" size=".027"/>
|
||||||
|
<site name="right_left_foot" class="touch" pos=".035 -.01 0" size=".03 .11" zaxis="21 -2 0"/>
|
||||||
|
<site name="left_left_foot" class="touch" pos=".035 .03 0" size=".03 .11" zaxis="21 2 0"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="right_upper_arm" pos="0 -.17 .06">
|
||||||
|
<joint name="right_shoulder1" axis="2 1 1" range="-85 60"/>
|
||||||
|
<joint name="right_shoulder2" axis="0 -1 1" range="-85 60"/>
|
||||||
|
<geom name="right_upper_arm" fromto="0 0 0 .16 -.16 -.16" size=".04 .16"/>
|
||||||
|
<site name="right_upper_arm" class="touch" pos=".08 -.08 -.08" size=".041 .14" zaxis="1 -1 -1"/>
|
||||||
|
<body name="right_lower_arm" pos=".18 -.18 -.18">
|
||||||
|
<joint name="right_elbow" axis="0 -1 1" range="-90 50" stiffness="0"/>
|
||||||
|
<geom name="right_lower_arm" fromto=".01 .01 .01 .17 .17 .17" size=".031"/>
|
||||||
|
<site name="right_lower_arm" class="touch" pos=".09 .09 .09" size=".032 .14" zaxis="1 1 1"/>
|
||||||
|
<body name="right_hand" pos=".18 .18 .18">
|
||||||
|
<geom name="right_hand" type="sphere" size=".04"/>
|
||||||
|
<site name="right_hand" class="touch" type="sphere" size=".041"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="left_upper_arm" pos="0 .17 .06">
|
||||||
|
<joint name="left_shoulder1" axis="2 -1 1" range="-60 85"/>
|
||||||
|
<joint name="left_shoulder2" axis="0 1 1" range="-60 85"/>
|
||||||
|
<geom name="left_upper_arm" fromto="0 0 0 .16 .16 -.16" size=".04 .16"/>
|
||||||
|
<site name="left_upper_arm" class="touch" pos=".08 .08 -.08" size=".041 .14" zaxis="1 1 -1"/>
|
||||||
|
<body name="left_lower_arm" pos=".18 .18 -.18">
|
||||||
|
<joint name="left_elbow" axis="0 -1 -1" range="-90 50" stiffness="0"/>
|
||||||
|
<geom name="left_lower_arm" fromto=".01 -.01 .01 .17 -.17 .17" size=".031"/>
|
||||||
|
<site name="left_lower_arm" class="touch" pos=".09 -.09 .09" size=".032 .14" zaxis="1 -1 1"/>
|
||||||
|
<body name="left_hand" pos=".18 -.18 .18">
|
||||||
|
<geom name="left_hand" type="sphere" size=".04"/>
|
||||||
|
<site name="left_hand" class="touch" type="sphere" size=".041"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="abdomen_y" gear="40" joint="abdomen_y"/>
|
||||||
|
<motor name="abdomen_z" gear="40" joint="abdomen_z"/>
|
||||||
|
<motor name="abdomen_x" gear="40" joint="abdomen_x"/>
|
||||||
|
<motor name="right_hip_x" gear="40" joint="right_hip_x"/>
|
||||||
|
<motor name="right_hip_z" gear="40" joint="right_hip_z"/>
|
||||||
|
<motor name="right_hip_y" gear="120" joint="right_hip_y"/>
|
||||||
|
<motor name="right_knee" gear="80" joint="right_knee"/>
|
||||||
|
<motor name="right_ankle_x" gear="20" joint="right_ankle_x"/>
|
||||||
|
<motor name="right_ankle_y" gear="20" joint="right_ankle_y"/>
|
||||||
|
<motor name="left_hip_x" gear="40" joint="left_hip_x"/>
|
||||||
|
<motor name="left_hip_z" gear="40" joint="left_hip_z"/>
|
||||||
|
<motor name="left_hip_y" gear="120" joint="left_hip_y"/>
|
||||||
|
<motor name="left_knee" gear="80" joint="left_knee"/>
|
||||||
|
<motor name="left_ankle_x" gear="20" joint="left_ankle_x"/>
|
||||||
|
<motor name="left_ankle_y" gear="20" joint="left_ankle_y"/>
|
||||||
|
<motor name="right_shoulder1" gear="20" joint="right_shoulder1"/>
|
||||||
|
<motor name="right_shoulder2" gear="20" joint="right_shoulder2"/>
|
||||||
|
<motor name="right_elbow" gear="40" joint="right_elbow"/>
|
||||||
|
<motor name="left_shoulder1" gear="20" joint="left_shoulder1"/>
|
||||||
|
<motor name="left_shoulder2" gear="20" joint="left_shoulder2"/>
|
||||||
|
<motor name="left_elbow" gear="40" joint="left_elbow"/>
|
||||||
|
</actuator>
|
||||||
|
|
||||||
|
<sensor>
|
||||||
|
<subtreelinvel name="torso_subtreelinvel" body="torso"/>
|
||||||
|
<accelerometer name="torso_accel" site="root"/>
|
||||||
|
<velocimeter name="torso_vel" site="root"/>
|
||||||
|
<gyro name="torso_gyro" site="root"/>
|
||||||
|
|
||||||
|
<force name="left_ankle_force" site="left_ankle"/>
|
||||||
|
<force name="right_ankle_force" site="right_ankle"/>
|
||||||
|
<force name="left_knee_force" site="left_knee"/>
|
||||||
|
<force name="right_knee_force" site="right_knee"/>
|
||||||
|
<force name="left_hip_force" site="left_hip"/>
|
||||||
|
<force name="right_hip_force" site="right_hip"/>
|
||||||
|
|
||||||
|
<torque name="left_ankle_torque" site="left_ankle"/>
|
||||||
|
<torque name="right_ankle_torque" site="right_ankle"/>
|
||||||
|
<torque name="left_knee_torque" site="left_knee"/>
|
||||||
|
<torque name="right_knee_torque" site="right_knee"/>
|
||||||
|
<torque name="left_hip_torque" site="left_hip"/>
|
||||||
|
<torque name="right_hip_torque" site="right_hip"/>
|
||||||
|
|
||||||
|
<touch name="torso_touch" site="torso"/>
|
||||||
|
<touch name="head_touch" site="head"/>
|
||||||
|
<touch name="lower_waist_touch" site="lower_waist"/>
|
||||||
|
<touch name="butt_touch" site="butt"/>
|
||||||
|
<touch name="right_thigh_touch" site="right_thigh"/>
|
||||||
|
<touch name="right_shin_touch" site="right_shin"/>
|
||||||
|
<touch name="right_right_foot_touch" site="right_right_foot"/>
|
||||||
|
<touch name="left_right_foot_touch" site="left_right_foot"/>
|
||||||
|
<touch name="left_thigh_touch" site="left_thigh"/>
|
||||||
|
<touch name="left_shin_touch" site="left_shin"/>
|
||||||
|
<touch name="right_left_foot_touch" site="right_left_foot"/>
|
||||||
|
<touch name="left_left_foot_touch" site="left_left_foot"/>
|
||||||
|
<touch name="right_upper_arm_touch" site="right_upper_arm"/>
|
||||||
|
<touch name="right_lower_arm_touch" site="right_lower_arm"/>
|
||||||
|
<touch name="right_hand_touch" site="right_hand"/>
|
||||||
|
<touch name="left_upper_arm_touch" site="left_upper_arm"/>
|
||||||
|
<touch name="left_lower_arm_touch" site="left_lower_arm"/>
|
||||||
|
<touch name="left_hand_touch" site="left_hand"/>
|
||||||
|
</sensor>
|
||||||
|
|
||||||
|
</mujoco>
|
||||||
|
|
179
Dreamer/local_dm_control_suite/humanoid_CMU.py
Executable file
179
Dreamer/local_dm_control_suite/humanoid_CMU.py
Executable file
@ -0,0 +1,179 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Humanoid_CMU Domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.suite.utils import randomizers
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 20
|
||||||
|
_CONTROL_TIMESTEP = 0.02
|
||||||
|
|
||||||
|
# Height of head above which stand reward is 1.
|
||||||
|
_STAND_HEIGHT = 1.4
|
||||||
|
|
||||||
|
# Horizontal speeds above which move reward is 1.
|
||||||
|
_WALK_SPEED = 1
|
||||||
|
_RUN_SPEED = 10
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('humanoid_CMU.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Stand task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = HumanoidCMU(move_speed=0, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Run task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = HumanoidCMU(move_speed=_RUN_SPEED, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the humanoid_CMU domain."""
|
||||||
|
|
||||||
|
def thorax_upright(self):
|
||||||
|
"""Returns projection from y-axes of thorax to the z-axes of world."""
|
||||||
|
return self.named.data.xmat['thorax', 'zy']
|
||||||
|
|
||||||
|
def head_height(self):
|
||||||
|
"""Returns the height of the head."""
|
||||||
|
return self.named.data.xpos['head', 'z']
|
||||||
|
|
||||||
|
def center_of_mass_position(self):
|
||||||
|
"""Returns position of the center-of-mass."""
|
||||||
|
return self.named.data.subtree_com['thorax']
|
||||||
|
|
||||||
|
def center_of_mass_velocity(self):
|
||||||
|
"""Returns the velocity of the center-of-mass."""
|
||||||
|
return self.named.data.sensordata['thorax_subtreelinvel'].copy()
|
||||||
|
|
||||||
|
def torso_vertical_orientation(self):
|
||||||
|
"""Returns the z-projection of the thorax orientation matrix."""
|
||||||
|
return self.named.data.xmat['thorax', ['zx', 'zy', 'zz']]
|
||||||
|
|
||||||
|
def joint_angles(self):
|
||||||
|
"""Returns the state without global orientation or position."""
|
||||||
|
return self.data.qpos[7:].copy() # Skip the 7 DoFs of the free root joint.
|
||||||
|
|
||||||
|
def extremities(self):
|
||||||
|
"""Returns end effector positions in egocentric frame."""
|
||||||
|
torso_frame = self.named.data.xmat['thorax'].reshape(3, 3)
|
||||||
|
torso_pos = self.named.data.xpos['thorax']
|
||||||
|
positions = []
|
||||||
|
for side in ('l', 'r'):
|
||||||
|
for limb in ('hand', 'foot'):
|
||||||
|
torso_to_limb = self.named.data.xpos[side + limb] - torso_pos
|
||||||
|
positions.append(torso_to_limb.dot(torso_frame))
|
||||||
|
return np.hstack(positions)
|
||||||
|
|
||||||
|
|
||||||
|
class HumanoidCMU(base.Task):
|
||||||
|
"""A task for the CMU Humanoid."""
|
||||||
|
|
||||||
|
def __init__(self, move_speed, random=None):
|
||||||
|
"""Initializes an instance of `Humanoid_CMU`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
move_speed: A float. If this value is zero, reward is given simply for
|
||||||
|
standing up. Otherwise this specifies a target horizontal velocity for
|
||||||
|
the walking task.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._move_speed = move_speed
|
||||||
|
super(HumanoidCMU, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets a random collision-free configuration at the start of each episode.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
"""
|
||||||
|
penetrating = True
|
||||||
|
while penetrating:
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(
|
||||||
|
physics, self.random)
|
||||||
|
# Check for collisions.
|
||||||
|
physics.after_reset()
|
||||||
|
penetrating = physics.data.ncon > 0
|
||||||
|
super(HumanoidCMU, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns a set of egocentric features."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['joint_angles'] = physics.joint_angles()
|
||||||
|
obs['head_height'] = physics.head_height()
|
||||||
|
obs['extremities'] = physics.extremities()
|
||||||
|
obs['torso_vertical'] = physics.torso_vertical_orientation()
|
||||||
|
obs['com_velocity'] = physics.center_of_mass_velocity()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a reward to the agent."""
|
||||||
|
standing = rewards.tolerance(physics.head_height(),
|
||||||
|
bounds=(_STAND_HEIGHT, float('inf')),
|
||||||
|
margin=_STAND_HEIGHT/4)
|
||||||
|
upright = rewards.tolerance(physics.thorax_upright(),
|
||||||
|
bounds=(0.9, float('inf')), sigmoid='linear',
|
||||||
|
margin=1.9, value_at_margin=0)
|
||||||
|
stand_reward = standing * upright
|
||||||
|
small_control = rewards.tolerance(physics.control(), margin=1,
|
||||||
|
value_at_margin=0,
|
||||||
|
sigmoid='quadratic').mean()
|
||||||
|
small_control = (4 + small_control) / 5
|
||||||
|
if self._move_speed == 0:
|
||||||
|
horizontal_velocity = physics.center_of_mass_velocity()[[0, 1]]
|
||||||
|
dont_move = rewards.tolerance(horizontal_velocity, margin=2).mean()
|
||||||
|
return small_control * stand_reward * dont_move
|
||||||
|
else:
|
||||||
|
com_velocity = np.linalg.norm(physics.center_of_mass_velocity()[[0, 1]])
|
||||||
|
move = rewards.tolerance(com_velocity,
|
||||||
|
bounds=(self._move_speed, float('inf')),
|
||||||
|
margin=self._move_speed, value_at_margin=0,
|
||||||
|
sigmoid='linear')
|
||||||
|
move = (5*move + 1) / 6
|
||||||
|
return small_control * stand_reward * move
|
289
Dreamer/local_dm_control_suite/humanoid_CMU.xml
Executable file
289
Dreamer/local_dm_control_suite/humanoid_CMU.xml
Executable file
@ -0,0 +1,289 @@
|
|||||||
|
<mujoco model="humanoid_CMU">
|
||||||
|
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<statistic extent="2" center="0 0 1"/>
|
||||||
|
|
||||||
|
<default class="main">
|
||||||
|
<joint limited="true" solimplimit="0 0.99 0.01" stiffness="0.1" armature=".01" damping="1"/>
|
||||||
|
<geom friction="0.7" solref="0.015 1" solimp="0.95 0.99 0.003"/>
|
||||||
|
<motor ctrllimited="true" ctrlrange="-1 1"/>
|
||||||
|
<default class="humanoid">
|
||||||
|
<geom type="capsule" material="self"/>
|
||||||
|
<default class="stiff_low">
|
||||||
|
<joint stiffness=".5" damping="4"/>
|
||||||
|
</default>
|
||||||
|
<default class="stiff_medium">
|
||||||
|
<joint stiffness="10" damping="5"/>
|
||||||
|
</default>
|
||||||
|
<default class="stiff_high">
|
||||||
|
<joint stiffness="30" damping="10"/>
|
||||||
|
</default>
|
||||||
|
<default class="touch">
|
||||||
|
<site group="3" rgba="0 0 1 .5"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<geom name="floor" type="plane" conaffinity="1" size="100 100 .2" material="grid"/>
|
||||||
|
<light name="tracking_light" pos="0 0 7" dir="0 0 -1" mode="trackcom"/>
|
||||||
|
<camera name="back" pos="0 3 2.4" xyaxes="-1 0 0 0 -1 2" mode="trackcom"/>
|
||||||
|
<camera name="side" pos="-3 0 2.4" xyaxes="0 -1 0 1 0 2" mode="trackcom"/>
|
||||||
|
<body name="root" childclass="humanoid" pos="0 0 1" euler="90 0 0">
|
||||||
|
<site name="root" size=".01" rgba="0.5 0.5 0.5 0"/>
|
||||||
|
<freejoint name="root"/>
|
||||||
|
<geom name="root_geom" size="0.09 0.06" pos="0 -0.05 0" quat="1 0 -1 0"/>
|
||||||
|
<body name="lhipjoint">
|
||||||
|
<geom name="lhipjoint" size="0.008 0.022" pos="0.051 -0.046 0.025" quat="0.5708 -0.566602 -0.594264 0"/>
|
||||||
|
<body name="lfemur" pos="0.102 -0.092 0.05" quat="1 0 0 0.17365">
|
||||||
|
<joint name="lfemurrz" axis="0 0 1" range="-60 70" class="stiff_medium"/>
|
||||||
|
<joint name="lfemurry" axis="0 1 0" range="-70 70" class="stiff_medium"/>
|
||||||
|
<joint name="lfemurrx" axis="1 0 0" range="-160 20" class="stiff_medium"/>
|
||||||
|
<geom name="lfemur" size="0.06 0.17" pos="-.01 -0.202473 0" quat="0.7 -0.7 -0.1228 -0.07"/>
|
||||||
|
<body name="ltibia" pos="0 -0.404945 0">
|
||||||
|
<joint name="ltibiarx" axis="1 0 0" range="1 170" class="stiff_low"/>
|
||||||
|
<geom name="ltibia" size="0.03 0.1825614" pos="0 -0.202846 0" quat="0.7 -0.7 -0.1228 -0.1228"/>
|
||||||
|
<geom name="lcalf" size="0.045 0.08" pos="0 -0.1 -.01" quat="0.7 -0.7 -0.1228 -0.1228"/>
|
||||||
|
<body name="lfoot" pos="0 -0.405693 0" quat="0.707107 -0.707107 0 0">
|
||||||
|
<site name="lfoot_touch" type="box" pos="-.005 -.02 -0.025" size=".04 .08 .02" euler="10 0 0" class="touch"/>
|
||||||
|
<joint name="lfootrz" axis="0 0 1" range="-70 20" class="stiff_medium"/>
|
||||||
|
<joint name="lfootrx" axis="1 0 0" range="-45 90" class="stiff_medium"/>
|
||||||
|
<geom name="lfoot0" size="0.02 0.06" pos="-0.02 -0.023 -0.01" euler="100 -2 0"/>
|
||||||
|
<geom name="lfoot1" size="0.02 0.06" pos="0 -0.023 -0.01" euler="100 0 0"/>
|
||||||
|
<geom name="lfoot2" size="0.02 0.06" pos=".01 -0.023 -0.01" euler="100 10 0"/>
|
||||||
|
<body name="ltoes" pos="0 -0.106372 -0.0227756">
|
||||||
|
<joint name="ltoesrx" axis="1 0 0" range="-90 20"/>
|
||||||
|
<geom name="ltoes0" type="sphere" size="0.02" pos="-.025 -0.01 -.01"/>
|
||||||
|
<geom name="ltoes1" type="sphere" size="0.02" pos="0 -0.005 -.01"/>
|
||||||
|
<geom name="ltoes2" type="sphere" size="0.02" pos=".02 .001 -.01"/>
|
||||||
|
<site name="ltoes_touch" type="capsule" pos="-.005 -.005 -.01" size="0.025 0.02" zaxis="1 .2 0" class="touch"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="rhipjoint">
|
||||||
|
<geom name="rhipjoint" size="0.008 0.022" pos="-0.051 -0.046 0.025" quat="0.574856 -0.547594 0.608014 0"/>
|
||||||
|
<body name="rfemur" pos="-0.102 -0.092 0.05" quat="1 0 0 -0.17365">
|
||||||
|
<joint name="rfemurrz" axis="0 0 1" range="-70 60" class="stiff_medium"/>
|
||||||
|
<joint name="rfemurry" axis="0 1 0" range="-70 70" class="stiff_medium"/>
|
||||||
|
<joint name="rfemurrx" axis="1 0 0" range="-160 20" class="stiff_medium"/>
|
||||||
|
<geom name="rfemur" size="0.06 0.17" pos=".01 -0.202473 0" quat="0.7 -0.7 0.1228 0.07"/>
|
||||||
|
<body name="rtibia" pos="0 -0.404945 0">
|
||||||
|
<joint name="rtibiarx" axis="1 0 0" range="1 170" class="stiff_low"/>
|
||||||
|
<geom name="rtibia" size="0.03 0.1825614" pos="0 -0.202846 0" quat="0.7 -0.7 0.1228 0.1228"/>
|
||||||
|
<geom name="rcalf" size="0.045 0.08" pos="0 -0.1 -.01" quat="0.7 -0.7 -0.1228 -0.1228"/>
|
||||||
|
<body name="rfoot" pos="0 -0.405693 0" quat="0.707107 -0.707107 0 0">
|
||||||
|
<site name="rfoot_touch" type="box" pos=".005 -.02 -0.025" size=".04 .08 .02" euler="10 0 0" class="touch"/>
|
||||||
|
<joint name="rfootrz" axis="0 0 1" range="-20 70" class="stiff_medium"/>
|
||||||
|
<joint name="rfootrx" axis="1 0 0" range="-45 90" class="stiff_medium"/>
|
||||||
|
<geom name="rfoot0" size="0.02 0.06" pos="0.02 -0.023 -0.01" euler="100 2 0"/>
|
||||||
|
<geom name="rfoot1" size="0.02 0.06" pos="0 -0.023 -0.01" euler="100 0 0"/>
|
||||||
|
<geom name="rfoot2" size="0.02 0.06" pos="-.01 -0.023 -0.01" euler="100 -10 0"/>
|
||||||
|
<body name="rtoes" pos="0 -0.106372 -0.0227756">
|
||||||
|
<joint name="rtoesrx" axis="1 0 0" range="-90 20"/>
|
||||||
|
<geom name="rtoes0" type="sphere" size="0.02" pos=".025 -0.01 -.01"/>
|
||||||
|
<geom name="rtoes1" type="sphere" size="0.02" pos="0 -0.005 -.01"/>
|
||||||
|
<geom name="rtoes2" type="sphere" size="0.02" pos="-.02 .001 -.01"/>
|
||||||
|
<site name="rtoes_touch" type="capsule" pos=".005 -.005 -.01" size="0.025 0.02" zaxis="1 -.2 0" class="touch"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="lowerback">
|
||||||
|
<joint name="lowerbackrz" axis="0 0 1" range="-30 30" class="stiff_high"/>
|
||||||
|
<joint name="lowerbackry" axis="0 1 0" range="-30 30" class="stiff_high"/>
|
||||||
|
<joint name="lowerbackrx" axis="1 0 0" range="-20 45" class="stiff_high"/>
|
||||||
|
<geom name="lowerback" size="0.065 0.055" pos="0 0.056 .03" quat="1 0 1 0"/>
|
||||||
|
<body name="upperback" pos="0 0.1 -0.01">
|
||||||
|
<joint name="upperbackrz" axis="0 0 1" range="-30 30" class="stiff_high"/>
|
||||||
|
<joint name="upperbackry" axis="0 1 0" range="-30 30" class="stiff_high"/>
|
||||||
|
<joint name="upperbackrx" axis="1 0 0" range="-20 45" class="stiff_high"/>
|
||||||
|
<geom name="upperback" size="0.06 0.06" pos="0 0.06 0.02" quat="1 0 1 0"/>
|
||||||
|
<body name="thorax" pos="0.000512528 0.11356 0.000936821">
|
||||||
|
<joint name="thoraxrz" axis="0 0 1" range="-30 30" class="stiff_high"/>
|
||||||
|
<joint name="thoraxry" axis="0 1 0" range="-30 30" class="stiff_high"/>
|
||||||
|
<joint name="thoraxrx" axis="1 0 0" range="-20 45" class="stiff_high"/>
|
||||||
|
<geom name="thorax" size="0.08 0.07" pos="0 0.05 0" quat="1 0 1 0"/>
|
||||||
|
<body name="lowerneck" pos="0 0.113945 0.00468037">
|
||||||
|
<joint name="lowerneckrz" axis="0 0 1" range="-30 30" class="stiff_medium"/>
|
||||||
|
<joint name="lowerneckry" axis="0 1 0" range="-30 30" class="stiff_medium"/>
|
||||||
|
<joint name="lowerneckrx" axis="1 0 0" range="-20 45" class="stiff_medium"/>
|
||||||
|
<geom name="lowerneck" size="0.08 0.02" pos="0 0.04 -.02" quat="1 1 0 0"/>
|
||||||
|
<body name="upperneck" pos="0 0.09 0.01">
|
||||||
|
<joint name="upperneckrz" axis="0 0 1" range="-30 30" class="stiff_medium"/>
|
||||||
|
<joint name="upperneckry" axis="0 1 0" range="-30 30" class="stiff_medium"/>
|
||||||
|
<joint name="upperneckrx" axis="1 0 0" range="-20 45" class="stiff_medium"/>
|
||||||
|
<geom name="upperneck" size="0.05 0.03" pos="0 0.05 0" quat=".8 1 0 0"/>
|
||||||
|
<body name="head" pos="0 0.09 0">
|
||||||
|
<camera name="egocentric" pos="0 0 0" xyaxes="-1 0 0 0 1 0" fovy="80"/>
|
||||||
|
<joint name="headrz" axis="0 0 1" range="-30 30" class="stiff_medium"/>
|
||||||
|
<joint name="headry" axis="0 1 0" range="-30 30" class="stiff_medium"/>
|
||||||
|
<joint name="headrx" axis="1 0 0" range="-20 45" class="stiff_medium"/>
|
||||||
|
<geom name="head" size="0.085 0.035" pos="0 0.11 0.03" quat="1 .9 0 0"/>
|
||||||
|
<geom name="leye" type="sphere" size="0.02" pos=" .03 0.11 0.1"/>
|
||||||
|
<geom name="reye" type="sphere" size="0.02" pos="-.03 0.11 0.1"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="lclavicle" pos="0 0.113945 0.00468037">
|
||||||
|
<joint name="lclaviclerz" axis="0 0 1" range="0 20" class="stiff_high"/>
|
||||||
|
<joint name="lclaviclery" axis="0 1 0" range="-20 10" class="stiff_high"/>
|
||||||
|
<geom name="lclavicle" size="0.08 0.04" pos="0.09 0.05 -.01" quat="1 0 -1 -.4"/>
|
||||||
|
<body name="lhumerus" pos="0.183 0.076 0.01" quat="0.18 0.68 -0.68 0.18">
|
||||||
|
<joint name="lhumerusrz" axis="0 0 1" range="-90 90" class="stiff_low"/>
|
||||||
|
<joint name="lhumerusry" axis="0 1 0" range="-90 90" class="stiff_low"/>
|
||||||
|
<joint name="lhumerusrx" axis="1 0 0" range="-60 90" class="stiff_low"/>
|
||||||
|
<geom name="lhumerus" size="0.035 0.124" pos="0 -0.138 0" quat="0.612 -0.612 0.35 0.35"/>
|
||||||
|
<body name="lradius" pos="0 -0.277 0">
|
||||||
|
<joint name="lradiusrx" axis="1 0 0" range="-10 170" class="stiff_low"/>
|
||||||
|
<geom name="lradius" size="0.03 0.06" pos="0 -0.08 0" quat="0.612 -0.612 0.35 0.35"/>
|
||||||
|
<body name="lwrist" pos="0 -0.17 0" quat="-0.5 0 0.866 0">
|
||||||
|
<joint name="lwristry" axis="0 1 0" range="-180 0"/>
|
||||||
|
<geom name="lwrist" size="0.025 0.03" pos="0 -0.02 0" quat="0 0 -1 -1"/>
|
||||||
|
<body name="lhand" pos="0 -0.08 0">
|
||||||
|
<joint name="lhandrz" axis="0 0 1" range="-45 45"/>
|
||||||
|
<joint name="lhandrx" axis="1 0 0" range="-90 90"/>
|
||||||
|
<geom name="lhand" type="ellipsoid" size=".048 0.02 0.06" pos="0 -0.047 0" quat="0 0 -1 -1"/>
|
||||||
|
<body name="lfingers" pos="0 -0.08 0">
|
||||||
|
<joint name="lfingersrx" axis="1 0 0" range="0 90"/>
|
||||||
|
<geom name="lfinger0" size="0.01 0.04" pos="-.03 -0.05 0" quat="1 -1 0 0" />
|
||||||
|
<geom name="lfinger1" size="0.01 0.04" pos="-.008 -0.06 0" quat="1 -1 0 0" />
|
||||||
|
<geom name="lfinger2" size="0.009 0.04" pos=".014 -0.06 0" quat="1 -1 0 0" />
|
||||||
|
<geom name="lfinger3" size="0.008 0.04" pos=".032 -0.05 0" quat="1 -1 0 0" />
|
||||||
|
</body>
|
||||||
|
<body name="lthumb" pos="-.02 -.03 0" quat="0.92388 0 0 -0.382683">
|
||||||
|
<joint name="lthumbrz" axis="0 0 1" range="-45 45"/>
|
||||||
|
<joint name="lthumbrx" axis="1 0 0" range="0 90"/>
|
||||||
|
<geom name="lthumb" size="0.012 0.04" pos="0 -0.06 0" quat="0 0 -1 -1"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="rclavicle" pos="0 0.113945 0.00468037">
|
||||||
|
<joint name="rclaviclerz" axis="0 0 1" range="-20 0" class="stiff_high"/>
|
||||||
|
<joint name="rclaviclery" axis="0 1 0" range="-10 20" class="stiff_high"/>
|
||||||
|
<geom name="rclavicle" size="0.08 0.04" pos="-.09 0.05 -.01" quat="1 0 -1 .4"/>
|
||||||
|
<body name="rhumerus" pos="-0.183 0.076 0.01" quat="0.18 0.68 0.68 -0.18">
|
||||||
|
<joint name="rhumerusrz" axis="0 0 1" range="-90 90" class="stiff_low"/>
|
||||||
|
<joint name="rhumerusry" axis="0 1 0" range="-90 90" class="stiff_low"/>
|
||||||
|
<joint name="rhumerusrx" axis="1 0 0" range="-60 90" class="stiff_low"/>
|
||||||
|
<geom name="rhumerus" size="0.035 0.124" pos="0 -0.138 0" quat="0.61 -0.61 -0.35 -0.35"/>
|
||||||
|
<body name="rradius" pos="0 -0.277 0">
|
||||||
|
<joint name="rradiusrx" axis="1 0 0" range="-10 170" class="stiff_low"/>
|
||||||
|
<geom name="rradius" size="0.03 0.06" pos="0 -0.08 0" quat="0.612 -0.612 -0.35 -0.35"/>
|
||||||
|
<body name="rwrist" pos="0 -0.17 0" quat="-0.5 0 -0.866 0">
|
||||||
|
<joint name="rwristry" axis="0 1 0" range="-180 0"/>
|
||||||
|
<geom name="rwrist" size="0.025 0.03" pos="0 -0.02 0" quat="0 0 1 1"/>
|
||||||
|
<body name="rhand" pos="0 -0.08 0">
|
||||||
|
<joint name="rhandrz" axis="0 0 1" range="-45 45"/>
|
||||||
|
<joint name="rhandrx" axis="1 0 0" range="-90 90"/>
|
||||||
|
<geom name="rhand" type="ellipsoid" size=".048 0.02 .06" pos="0 -0.047 0" quat="0 0 1 1"/>
|
||||||
|
<body name="rfingers" pos="0 -0.08 0">
|
||||||
|
<joint name="rfingersrx" axis="1 0 0" range="0 90"/>
|
||||||
|
<geom name="rfinger0" size="0.01 0.04" pos=".03 -0.05 0" quat="1 -1 0 0" />
|
||||||
|
<geom name="rfinger1" size="0.01 0.04" pos=".008 -0.06 0" quat="1 -1 0 0" />
|
||||||
|
<geom name="rfinger2" size="0.009 0.04" pos="-.014 -0.06 0" quat="1 -1 0 0" />
|
||||||
|
<geom name="rfinger3" size="0.008 0.04" pos="-.032 -0.05 0" quat="1 -1 0 0" />
|
||||||
|
</body>
|
||||||
|
<body name="rthumb" pos=".02 -.03 0" quat="0.92388 0 0 0.382683">
|
||||||
|
<joint name="rthumbrz" axis="0 0 1" range="-45 45"/>
|
||||||
|
<joint name="rthumbrx" axis="1 0 0" range="0 90"/>
|
||||||
|
<geom name="rthumb" size="0.012 0.04" pos="0 -0.06 0" quat="0 0 1 1"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<contact>
|
||||||
|
<exclude body1="lclavicle" body2="rclavicle"/>
|
||||||
|
<exclude body1="lowerneck" body2="lclavicle"/>
|
||||||
|
<exclude body1="lowerneck" body2="rclavicle"/>
|
||||||
|
<exclude body1="upperneck" body2="lclavicle"/>
|
||||||
|
<exclude body1="upperneck" body2="rclavicle"/>
|
||||||
|
</contact>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="headrx" joint="headrx" gear="20"/>
|
||||||
|
<motor name="headry" joint="headry" gear="20"/>
|
||||||
|
<motor name="headrz" joint="headrz" gear="20"/>
|
||||||
|
<motor name="lclaviclery" joint="lclaviclery" gear="20"/>
|
||||||
|
<motor name="lclaviclerz" joint="lclaviclerz" gear="20"/>
|
||||||
|
<motor name="lfemurrx" joint="lfemurrx" gear="120"/>
|
||||||
|
<motor name="lfemurry" joint="lfemurry" gear="40"/>
|
||||||
|
<motor name="lfemurrz" joint="lfemurrz" gear="40"/>
|
||||||
|
<motor name="lfingersrx" joint="lfingersrx" gear="20"/>
|
||||||
|
<motor name="lfootrx" joint="lfootrx" gear="20"/>
|
||||||
|
<motor name="lfootrz" joint="lfootrz" gear="20"/>
|
||||||
|
<motor name="lhandrx" joint="lhandrx" gear="20"/>
|
||||||
|
<motor name="lhandrz" joint="lhandrz" gear="20"/>
|
||||||
|
<motor name="lhumerusrx" joint="lhumerusrx" gear="40"/>
|
||||||
|
<motor name="lhumerusry" joint="lhumerusry" gear="40"/>
|
||||||
|
<motor name="lhumerusrz" joint="lhumerusrz" gear="40"/>
|
||||||
|
<motor name="lowerbackrx" joint="lowerbackrx" gear="40"/>
|
||||||
|
<motor name="lowerbackry" joint="lowerbackry" gear="40"/>
|
||||||
|
<motor name="lowerbackrz" joint="lowerbackrz" gear="40"/>
|
||||||
|
<motor name="lowerneckrx" joint="lowerneckrx" gear="20"/>
|
||||||
|
<motor name="lowerneckry" joint="lowerneckry" gear="20"/>
|
||||||
|
<motor name="lowerneckrz" joint="lowerneckrz" gear="20"/>
|
||||||
|
<motor name="lradiusrx" joint="lradiusrx" gear="40"/>
|
||||||
|
<motor name="lthumbrx" joint="lthumbrx" gear="20"/>
|
||||||
|
<motor name="lthumbrz" joint="lthumbrz" gear="20"/>
|
||||||
|
<motor name="ltibiarx" joint="ltibiarx" gear="80"/>
|
||||||
|
<motor name="ltoesrx" joint="ltoesrx" gear="20"/>
|
||||||
|
<motor name="lwristry" joint="lwristry" gear="20"/>
|
||||||
|
<motor name="rclaviclery" joint="rclaviclery" gear="20"/>
|
||||||
|
<motor name="rclaviclerz" joint="rclaviclerz" gear="20"/>
|
||||||
|
<motor name="rfemurrx" joint="rfemurrx" gear="120"/>
|
||||||
|
<motor name="rfemurry" joint="rfemurry" gear="40"/>
|
||||||
|
<motor name="rfemurrz" joint="rfemurrz" gear="40"/>
|
||||||
|
<motor name="rfingersrx" joint="rfingersrx" gear="20"/>
|
||||||
|
<motor name="rfootrx" joint="rfootrx" gear="20"/>
|
||||||
|
<motor name="rfootrz" joint="rfootrz" gear="20"/>
|
||||||
|
<motor name="rhandrx" joint="rhandrx" gear="20"/>
|
||||||
|
<motor name="rhandrz" joint="rhandrz" gear="20"/>
|
||||||
|
<motor name="rhumerusrx" joint="rhumerusrx" gear="40"/>
|
||||||
|
<motor name="rhumerusry" joint="rhumerusry" gear="40"/>
|
||||||
|
<motor name="rhumerusrz" joint="rhumerusrz" gear="40"/>
|
||||||
|
<motor name="rradiusrx" joint="rradiusrx" gear="40"/>
|
||||||
|
<motor name="rthumbrx" joint="rthumbrx" gear="20"/>
|
||||||
|
<motor name="rthumbrz" joint="rthumbrz" gear="20"/>
|
||||||
|
<motor name="rtibiarx" joint="rtibiarx" gear="80"/>
|
||||||
|
<motor name="rtoesrx" joint="rtoesrx" gear="20"/>
|
||||||
|
<motor name="rwristry" joint="rwristry" gear="20"/>
|
||||||
|
<motor name="thoraxrx" joint="thoraxrx" gear="40"/>
|
||||||
|
<motor name="thoraxry" joint="thoraxry" gear="40"/>
|
||||||
|
<motor name="thoraxrz" joint="thoraxrz" gear="40"/>
|
||||||
|
<motor name="upperbackrx" joint="upperbackrx" gear="40"/>
|
||||||
|
<motor name="upperbackry" joint="upperbackry" gear="40"/>
|
||||||
|
<motor name="upperbackrz" joint="upperbackrz" gear="40"/>
|
||||||
|
<motor name="upperneckrx" joint="upperneckrx" gear="20"/>
|
||||||
|
<motor name="upperneckry" joint="upperneckry" gear="20"/>
|
||||||
|
<motor name="upperneckrz" joint="upperneckrz" gear="20"/>
|
||||||
|
</actuator>
|
||||||
|
|
||||||
|
<sensor>
|
||||||
|
<subtreelinvel name="thorax_subtreelinvel" body="thorax"/>
|
||||||
|
<velocimeter name="sensor_root_veloc" site="root"/>
|
||||||
|
<gyro name="sensor_root_gyro" site="root"/>
|
||||||
|
<accelerometer name="sensor_root_accel" site="root"/>
|
||||||
|
<touch name="sensor_touch_ltoes" site="ltoes_touch"/>
|
||||||
|
<touch name="sensor_touch_rtoes" site="rtoes_touch"/>
|
||||||
|
<touch name="sensor_touch_rfoot" site="rfoot_touch"/>
|
||||||
|
<touch name="sensor_touch_lfoot" site="lfoot_touch"/>
|
||||||
|
</sensor>
|
||||||
|
|
||||||
|
</mujoco>
|
272
Dreamer/local_dm_control_suite/lqr.py
Executable file
272
Dreamer/local_dm_control_suite/lqr.py
Executable file
@ -0,0 +1,272 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Procedurally generated LQR domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
import os
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import xml_tools
|
||||||
|
from lxml import etree
|
||||||
|
import numpy as np
|
||||||
|
from six.moves import range
|
||||||
|
|
||||||
|
from dm_control.utils import io as resources
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = float('inf')
|
||||||
|
_CONTROL_COST_COEF = 0.1
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets(n_bodies, n_actuators, random):
|
||||||
|
"""Returns the model description as an XML string and a dict of assets.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
n_bodies: An int, number of bodies of the LQR.
|
||||||
|
n_actuators: An int, number of actuated bodies of the LQR. `n_actuators`
|
||||||
|
should be less or equal than `n_bodies`.
|
||||||
|
random: A `numpy.random.RandomState` instance.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tuple `(model_xml_string, assets)`, where `assets` is a dict consisting of
|
||||||
|
`{filename: contents_string}` pairs.
|
||||||
|
"""
|
||||||
|
return _make_model(n_bodies, n_actuators, random), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def lqr_2_1(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns an LQR environment with 2 bodies of which the first is actuated."""
|
||||||
|
return _make_lqr(n_bodies=2,
|
||||||
|
n_actuators=1,
|
||||||
|
control_cost_coef=_CONTROL_COST_COEF,
|
||||||
|
time_limit=time_limit,
|
||||||
|
random=random,
|
||||||
|
environment_kwargs=environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def lqr_6_2(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns an LQR environment with 6 bodies of which first 2 are actuated."""
|
||||||
|
return _make_lqr(n_bodies=6,
|
||||||
|
n_actuators=2,
|
||||||
|
control_cost_coef=_CONTROL_COST_COEF,
|
||||||
|
time_limit=time_limit,
|
||||||
|
random=random,
|
||||||
|
environment_kwargs=environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_lqr(n_bodies, n_actuators, control_cost_coef, time_limit, random,
|
||||||
|
environment_kwargs):
|
||||||
|
"""Returns a LQR environment.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
n_bodies: An int, number of bodies of the LQR.
|
||||||
|
n_actuators: An int, number of actuated bodies of the LQR. `n_actuators`
|
||||||
|
should be less or equal than `n_bodies`.
|
||||||
|
control_cost_coef: A number, the coefficient of the control cost.
|
||||||
|
time_limit: An int, maximum time for each episode in seconds.
|
||||||
|
random: Either an existing `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically.
|
||||||
|
environment_kwargs: A `dict` specifying keyword arguments for the
|
||||||
|
environment, or None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A LQR environment with `n_bodies` bodies of which first `n_actuators` are
|
||||||
|
actuated.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not isinstance(random, np.random.RandomState):
|
||||||
|
random = np.random.RandomState(random)
|
||||||
|
|
||||||
|
model_string, assets = get_model_and_assets(n_bodies, n_actuators,
|
||||||
|
random=random)
|
||||||
|
physics = Physics.from_xml_string(model_string, assets=assets)
|
||||||
|
task = LQRLevel(control_cost_coef, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(physics, task, time_limit=time_limit,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_body(body_id, stiffness_range, damping_range, random):
|
||||||
|
"""Returns an `etree.Element` defining a body.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
body_id: Id of the created body.
|
||||||
|
stiffness_range: A tuple of (stiffness_lower_bound, stiffness_uppder_bound).
|
||||||
|
The stiffness of the joint is drawn uniformly from this range.
|
||||||
|
damping_range: A tuple of (damping_lower_bound, damping_upper_bound). The
|
||||||
|
damping of the joint is drawn uniformly from this range.
|
||||||
|
random: A `numpy.random.RandomState` instance.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A new instance of `etree.Element`. A body element with two children: joint
|
||||||
|
and geom.
|
||||||
|
"""
|
||||||
|
body_name = 'body_{}'.format(body_id)
|
||||||
|
joint_name = 'joint_{}'.format(body_id)
|
||||||
|
geom_name = 'geom_{}'.format(body_id)
|
||||||
|
|
||||||
|
body = etree.Element('body', name=body_name)
|
||||||
|
body.set('pos', '.25 0 0')
|
||||||
|
joint = etree.SubElement(body, 'joint', name=joint_name)
|
||||||
|
body.append(etree.Element('geom', name=geom_name))
|
||||||
|
joint.set('stiffness',
|
||||||
|
str(random.uniform(stiffness_range[0], stiffness_range[1])))
|
||||||
|
joint.set('damping',
|
||||||
|
str(random.uniform(damping_range[0], damping_range[1])))
|
||||||
|
return body
|
||||||
|
|
||||||
|
|
||||||
|
def _make_model(n_bodies,
|
||||||
|
n_actuators,
|
||||||
|
random,
|
||||||
|
stiffness_range=(15, 25),
|
||||||
|
damping_range=(0, 0)):
|
||||||
|
"""Returns an MJCF XML string defining a model of springs and dampers.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
n_bodies: An integer, the number of bodies (DoFs) in the system.
|
||||||
|
n_actuators: An integer, the number of actuated bodies.
|
||||||
|
random: A `numpy.random.RandomState` instance.
|
||||||
|
stiffness_range: A tuple containing minimum and maximum stiffness. Each
|
||||||
|
joint's stiffness is sampled uniformly from this interval.
|
||||||
|
damping_range: A tuple containing minimum and maximum damping. Each joint's
|
||||||
|
damping is sampled uniformly from this interval.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An MJCF string describing the linear system.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the number of bodies or actuators is erronous.
|
||||||
|
"""
|
||||||
|
if n_bodies < 1 or n_actuators < 1:
|
||||||
|
raise ValueError('At least 1 body and 1 actuator required.')
|
||||||
|
if n_actuators > n_bodies:
|
||||||
|
raise ValueError('At most 1 actuator per body.')
|
||||||
|
|
||||||
|
file_path = os.path.join(os.path.dirname(__file__), 'lqr.xml')
|
||||||
|
with resources.GetResourceAsFile(file_path) as xml_file:
|
||||||
|
mjcf = xml_tools.parse(xml_file)
|
||||||
|
parent = mjcf.find('./worldbody')
|
||||||
|
actuator = etree.SubElement(mjcf.getroot(), 'actuator')
|
||||||
|
tendon = etree.SubElement(mjcf.getroot(), 'tendon')
|
||||||
|
|
||||||
|
for body in range(n_bodies):
|
||||||
|
# Inserting body.
|
||||||
|
child = _make_body(body, stiffness_range, damping_range, random)
|
||||||
|
site_name = 'site_{}'.format(body)
|
||||||
|
child.append(etree.Element('site', name=site_name))
|
||||||
|
|
||||||
|
if body == 0:
|
||||||
|
child.set('pos', '.25 0 .1')
|
||||||
|
# Add actuators to the first n_actuators bodies.
|
||||||
|
if body < n_actuators:
|
||||||
|
# Adding actuator.
|
||||||
|
joint_name = 'joint_{}'.format(body)
|
||||||
|
motor_name = 'motor_{}'.format(body)
|
||||||
|
child.find('joint').set('name', joint_name)
|
||||||
|
actuator.append(etree.Element('motor', name=motor_name, joint=joint_name))
|
||||||
|
|
||||||
|
# Add a tendon between consecutive bodies (for visualisation purposes only).
|
||||||
|
if body < n_bodies - 1:
|
||||||
|
child_site_name = 'site_{}'.format(body + 1)
|
||||||
|
tendon_name = 'tendon_{}'.format(body)
|
||||||
|
spatial = etree.SubElement(tendon, 'spatial', name=tendon_name)
|
||||||
|
spatial.append(etree.Element('site', site=site_name))
|
||||||
|
spatial.append(etree.Element('site', site=child_site_name))
|
||||||
|
parent.append(child)
|
||||||
|
parent = child
|
||||||
|
|
||||||
|
return etree.tostring(mjcf, pretty_print=True)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the LQR domain."""
|
||||||
|
|
||||||
|
def state_norm(self):
|
||||||
|
"""Returns the norm of the physics state."""
|
||||||
|
return np.linalg.norm(self.state())
|
||||||
|
|
||||||
|
|
||||||
|
class LQRLevel(base.Task):
|
||||||
|
"""A Linear Quadratic Regulator `Task`."""
|
||||||
|
|
||||||
|
_TERMINAL_TOL = 1e-6
|
||||||
|
|
||||||
|
def __init__(self, control_cost_coef, random=None):
|
||||||
|
"""Initializes an LQR level with cost = sum(states^2) + c*sum(controls^2).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
control_cost_coef: The coefficient of the control cost.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the control cost coefficient is not positive.
|
||||||
|
"""
|
||||||
|
if control_cost_coef <= 0:
|
||||||
|
raise ValueError('control_cost_coef must be positive.')
|
||||||
|
|
||||||
|
self._control_cost_coef = control_cost_coef
|
||||||
|
super(LQRLevel, self).__init__(random=random)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def control_cost_coef(self):
|
||||||
|
return self._control_cost_coef
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Random state sampled from a unit sphere."""
|
||||||
|
ndof = physics.model.nq
|
||||||
|
unit = self.random.randn(ndof)
|
||||||
|
physics.data.qpos[:] = np.sqrt(2) * unit / np.linalg.norm(unit)
|
||||||
|
super(LQRLevel, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of the state."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['position'] = physics.position()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a quadratic state and control reward."""
|
||||||
|
position = physics.position()
|
||||||
|
state_cost = 0.5 * np.dot(position, position)
|
||||||
|
control_signal = physics.control()
|
||||||
|
control_l2_norm = 0.5 * np.dot(control_signal, control_signal)
|
||||||
|
return 1 - (state_cost + control_l2_norm * self._control_cost_coef)
|
||||||
|
|
||||||
|
def get_evaluation(self, physics):
|
||||||
|
"""Returns a sparse evaluation reward that is not used for learning."""
|
||||||
|
return float(physics.state_norm() <= 0.01)
|
||||||
|
|
||||||
|
def get_termination(self, physics):
|
||||||
|
"""Terminates when the state norm is smaller than epsilon."""
|
||||||
|
if physics.state_norm() < self._TERMINAL_TOL:
|
||||||
|
return 0.0
|
26
Dreamer/local_dm_control_suite/lqr.xml
Executable file
26
Dreamer/local_dm_control_suite/lqr.xml
Executable file
@ -0,0 +1,26 @@
|
|||||||
|
<mujoco model="LQR">
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<option timestep=".03"/>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<joint type="slide" axis="0 1 0"/>
|
||||||
|
<geom type="sphere" size=".1" material="self"/>
|
||||||
|
<site size=".01"/>
|
||||||
|
<tendon width=".02" material="self"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<option>
|
||||||
|
<flag constraint="disable"/>
|
||||||
|
</option>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<light name="light" pos="0 0 2"/>
|
||||||
|
<camera name="cam0" pos="-1.428 -0.311 0.856" xyaxes="0.099 -0.995 0.000 0.350 0.035 0.936"/>
|
||||||
|
<camera name="cam1" pos="1.787 2.452 4.331" xyaxes="-1 0 0 0 -0.868 0.497"/>
|
||||||
|
<geom name="floor" size="4 1 .2" type="plane" material="grid"/>
|
||||||
|
<geom name="origin" pos="2 0 .05" size="2 .003 .05" type="box" rgba=".5 .5 .5 .5"/>
|
||||||
|
</worldbody>
|
||||||
|
</mujoco>
|
142
Dreamer/local_dm_control_suite/lqr_solver.py
Executable file
142
Dreamer/local_dm_control_suite/lqr_solver.py
Executable file
@ -0,0 +1,142 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
r"""Optimal policy for LQR levels.
|
||||||
|
|
||||||
|
LQR control problem is described in
|
||||||
|
https://en.wikipedia.org/wiki/Linear-quadratic_regulator#Infinite-horizon.2C_discrete-time_LQR
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
from absl import logging
|
||||||
|
from dm_control.mujoco import wrapper
|
||||||
|
import numpy as np
|
||||||
|
from six.moves import range
|
||||||
|
|
||||||
|
try:
|
||||||
|
import scipy.linalg as sp # pylint: disable=g-import-not-at-top
|
||||||
|
except ImportError:
|
||||||
|
sp = None
|
||||||
|
|
||||||
|
|
||||||
|
def _solve_dare(a, b, q, r):
|
||||||
|
"""Solves the Discrete-time Algebraic Riccati Equation (DARE) by iteration.
|
||||||
|
|
||||||
|
Algebraic Riccati Equation:
|
||||||
|
```none
|
||||||
|
P_{t-1} = Q + A' * P_{t} * A -
|
||||||
|
A' * P_{t} * B * (R + B' * P_{t} * B)^{-1} * B' * P_{t} * A
|
||||||
|
```
|
||||||
|
|
||||||
|
Args:
|
||||||
|
a: A 2 dimensional numpy array, transition matrix A.
|
||||||
|
b: A 2 dimensional numpy array, control matrix B.
|
||||||
|
q: A 2 dimensional numpy array, symmetric positive definite cost matrix.
|
||||||
|
r: A 2 dimensional numpy array, symmetric positive definite cost matrix
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A numpy array, a real symmetric matrix P which is the solution to DARE.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: If the computed P matrix is not symmetric and
|
||||||
|
positive-definite.
|
||||||
|
"""
|
||||||
|
p = np.eye(len(a))
|
||||||
|
for _ in range(1000000):
|
||||||
|
a_p = a.T.dot(p) # A' * P_t
|
||||||
|
a_p_b = np.dot(a_p, b) # A' * P_t * B
|
||||||
|
# Algebraic Riccati Equation.
|
||||||
|
p_next = q + np.dot(a_p, a) - a_p_b.dot(
|
||||||
|
np.linalg.solve(b.T.dot(p.dot(b)) + r, a_p_b.T))
|
||||||
|
p_next += p_next.T
|
||||||
|
p_next *= .5
|
||||||
|
if np.abs(p - p_next).max() < 1e-12:
|
||||||
|
break
|
||||||
|
p = p_next
|
||||||
|
else:
|
||||||
|
logging.warning('DARE solver did not converge')
|
||||||
|
try:
|
||||||
|
# Check that the result is symmetric and positive-definite.
|
||||||
|
np.linalg.cholesky(p_next)
|
||||||
|
except np.linalg.LinAlgError:
|
||||||
|
raise RuntimeError('ARE solver failed: P matrix is not symmetric and '
|
||||||
|
'positive-definite.')
|
||||||
|
return p_next
|
||||||
|
|
||||||
|
|
||||||
|
def solve(env):
|
||||||
|
"""Returns the optimal value and policy for LQR problem.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env: An instance of `control.EnvironmentV2` with LQR level.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
p: A numpy array, the Hessian of the optimal total cost-to-go (value
|
||||||
|
function at state x) is V(x) = .5 * x' * p * x.
|
||||||
|
k: A numpy array which gives the optimal linear policy u = k * x.
|
||||||
|
beta: The maximum eigenvalue of (a + b * k). Under optimal policy, at
|
||||||
|
timestep n the state tends to 0 like beta^n.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: If the controlled system is unstable.
|
||||||
|
"""
|
||||||
|
n = env.physics.model.nq # number of DoFs
|
||||||
|
m = env.physics.model.nu # number of controls
|
||||||
|
|
||||||
|
# Compute the mass matrix.
|
||||||
|
mass = np.zeros((n, n))
|
||||||
|
wrapper.mjbindings.mjlib.mj_fullM(env.physics.model.ptr, mass,
|
||||||
|
env.physics.data.qM)
|
||||||
|
|
||||||
|
# Compute input matrices a, b, q and r to the DARE solvers.
|
||||||
|
# State transition matrix a.
|
||||||
|
stiffness = np.diag(env.physics.model.jnt_stiffness.ravel())
|
||||||
|
damping = np.diag(env.physics.model.dof_damping.ravel())
|
||||||
|
dt = env.physics.model.opt.timestep
|
||||||
|
|
||||||
|
j = np.linalg.solve(-mass, np.hstack((stiffness, damping)))
|
||||||
|
a = np.eye(2 * n) + dt * np.vstack(
|
||||||
|
(dt * j + np.hstack((np.zeros((n, n)), np.eye(n))), j))
|
||||||
|
|
||||||
|
# Control transition matrix b.
|
||||||
|
b = env.physics.data.actuator_moment.T
|
||||||
|
bc = np.linalg.solve(mass, b)
|
||||||
|
b = dt * np.vstack((dt * bc, bc))
|
||||||
|
|
||||||
|
# State cost Hessian q.
|
||||||
|
q = np.diag(np.hstack([np.ones(n), np.zeros(n)]))
|
||||||
|
|
||||||
|
# Control cost Hessian r.
|
||||||
|
r = env.task.control_cost_coef * np.eye(m)
|
||||||
|
|
||||||
|
if sp:
|
||||||
|
# Use scipy's faster DARE solver if available.
|
||||||
|
solve_dare = sp.solve_discrete_are
|
||||||
|
else:
|
||||||
|
# Otherwise fall back on a slower internal implementation.
|
||||||
|
solve_dare = _solve_dare
|
||||||
|
|
||||||
|
# Solve the discrete algebraic Riccati equation.
|
||||||
|
p = solve_dare(a, b, q, r)
|
||||||
|
k = -np.linalg.solve(b.T.dot(p.dot(b)) + r, b.T.dot(p.dot(a)))
|
||||||
|
|
||||||
|
# Under optimal policy, state tends to 0 like beta^n_timesteps
|
||||||
|
beta = np.abs(np.linalg.eigvals(a + b.dot(k))).max()
|
||||||
|
if beta >= 1.0:
|
||||||
|
raise RuntimeError('Controlled system is unstable.')
|
||||||
|
return p, k, beta
|
290
Dreamer/local_dm_control_suite/manipulator.py
Executable file
290
Dreamer/local_dm_control_suite/manipulator.py
Executable file
@ -0,0 +1,290 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Planar Manipulator domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
from dm_control.utils import xml_tools
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
_CLOSE = .01 # (Meters) Distance below which a thing is considered close.
|
||||||
|
_CONTROL_TIMESTEP = .01 # (Seconds)
|
||||||
|
_TIME_LIMIT = 10 # (Seconds)
|
||||||
|
_P_IN_HAND = .1 # Probabillity of object-in-hand initial state
|
||||||
|
_P_IN_TARGET = .1 # Probabillity of object-in-target initial state
|
||||||
|
_ARM_JOINTS = ['arm_root', 'arm_shoulder', 'arm_elbow', 'arm_wrist',
|
||||||
|
'finger', 'fingertip', 'thumb', 'thumbtip']
|
||||||
|
_ALL_PROPS = frozenset(['ball', 'target_ball', 'cup',
|
||||||
|
'peg', 'target_peg', 'slot'])
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def make_model(use_peg, insert):
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
xml_string = common.read_model('manipulator.xml')
|
||||||
|
parser = etree.XMLParser(remove_blank_text=True)
|
||||||
|
mjcf = etree.XML(xml_string, parser)
|
||||||
|
|
||||||
|
# Select the desired prop.
|
||||||
|
if use_peg:
|
||||||
|
required_props = ['peg', 'target_peg']
|
||||||
|
if insert:
|
||||||
|
required_props += ['slot']
|
||||||
|
else:
|
||||||
|
required_props = ['ball', 'target_ball']
|
||||||
|
if insert:
|
||||||
|
required_props += ['cup']
|
||||||
|
|
||||||
|
# Remove unused props
|
||||||
|
for unused_prop in _ALL_PROPS.difference(required_props):
|
||||||
|
prop = xml_tools.find_element(mjcf, 'body', unused_prop)
|
||||||
|
prop.getparent().remove(prop)
|
||||||
|
|
||||||
|
return etree.tostring(mjcf, pretty_print=True), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking', 'hard')
|
||||||
|
def bring_ball(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns manipulator bring task with the ball prop."""
|
||||||
|
use_peg = False
|
||||||
|
insert = False
|
||||||
|
physics = Physics.from_xml_string(*make_model(use_peg, insert))
|
||||||
|
task = Bring(use_peg=use_peg, insert=insert,
|
||||||
|
fully_observable=fully_observable, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('hard')
|
||||||
|
def bring_peg(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns manipulator bring task with the peg prop."""
|
||||||
|
use_peg = True
|
||||||
|
insert = False
|
||||||
|
physics = Physics.from_xml_string(*make_model(use_peg, insert))
|
||||||
|
task = Bring(use_peg=use_peg, insert=insert,
|
||||||
|
fully_observable=fully_observable, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('hard')
|
||||||
|
def insert_ball(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns manipulator insert task with the ball prop."""
|
||||||
|
use_peg = False
|
||||||
|
insert = True
|
||||||
|
physics = Physics.from_xml_string(*make_model(use_peg, insert))
|
||||||
|
task = Bring(use_peg=use_peg, insert=insert,
|
||||||
|
fully_observable=fully_observable, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('hard')
|
||||||
|
def insert_peg(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns manipulator insert task with the peg prop."""
|
||||||
|
use_peg = True
|
||||||
|
insert = True
|
||||||
|
physics = Physics.from_xml_string(*make_model(use_peg, insert))
|
||||||
|
task = Bring(use_peg=use_peg, insert=insert,
|
||||||
|
fully_observable=fully_observable, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics with additional features for the Planar Manipulator domain."""
|
||||||
|
|
||||||
|
def bounded_joint_pos(self, joint_names):
|
||||||
|
"""Returns joint positions as (sin, cos) values."""
|
||||||
|
joint_pos = self.named.data.qpos[joint_names]
|
||||||
|
return np.vstack([np.sin(joint_pos), np.cos(joint_pos)]).T
|
||||||
|
|
||||||
|
def joint_vel(self, joint_names):
|
||||||
|
"""Returns joint velocities."""
|
||||||
|
return self.named.data.qvel[joint_names]
|
||||||
|
|
||||||
|
def body_2d_pose(self, body_names, orientation=True):
|
||||||
|
"""Returns positions and/or orientations of bodies."""
|
||||||
|
if not isinstance(body_names, str):
|
||||||
|
body_names = np.array(body_names).reshape(-1, 1) # Broadcast indices.
|
||||||
|
pos = self.named.data.xpos[body_names, ['x', 'z']]
|
||||||
|
if orientation:
|
||||||
|
ori = self.named.data.xquat[body_names, ['qw', 'qy']]
|
||||||
|
return np.hstack([pos, ori])
|
||||||
|
else:
|
||||||
|
return pos
|
||||||
|
|
||||||
|
def touch(self):
|
||||||
|
return np.log1p(self.data.sensordata)
|
||||||
|
|
||||||
|
def site_distance(self, site1, site2):
|
||||||
|
site1_to_site2 = np.diff(self.named.data.site_xpos[[site2, site1]], axis=0)
|
||||||
|
return np.linalg.norm(site1_to_site2)
|
||||||
|
|
||||||
|
|
||||||
|
class Bring(base.Task):
|
||||||
|
"""A Bring `Task`: bring the prop to the target."""
|
||||||
|
|
||||||
|
def __init__(self, use_peg, insert, fully_observable, random=None):
|
||||||
|
"""Initialize an instance of the `Bring` task.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
use_peg: A `bool`, whether to replace the ball prop with the peg prop.
|
||||||
|
insert: A `bool`, whether to insert the prop in a receptacle.
|
||||||
|
fully_observable: A `bool`, whether the observation should contain the
|
||||||
|
position and velocity of the object being manipulated and the target
|
||||||
|
location.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._use_peg = use_peg
|
||||||
|
self._target = 'target_peg' if use_peg else 'target_ball'
|
||||||
|
self._object = 'peg' if self._use_peg else 'ball'
|
||||||
|
self._object_joints = ['_'.join([self._object, dim]) for dim in 'xzy']
|
||||||
|
self._receptacle = 'slot' if self._use_peg else 'cup'
|
||||||
|
self._insert = insert
|
||||||
|
self._fully_observable = fully_observable
|
||||||
|
super(Bring, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode."""
|
||||||
|
# Local aliases
|
||||||
|
choice = self.random.choice
|
||||||
|
uniform = self.random.uniform
|
||||||
|
model = physics.named.model
|
||||||
|
data = physics.named.data
|
||||||
|
|
||||||
|
# Find a collision-free random initial configuration.
|
||||||
|
penetrating = True
|
||||||
|
while penetrating:
|
||||||
|
|
||||||
|
# Randomise angles of arm joints.
|
||||||
|
is_limited = model.jnt_limited[_ARM_JOINTS].astype(np.bool)
|
||||||
|
joint_range = model.jnt_range[_ARM_JOINTS]
|
||||||
|
lower_limits = np.where(is_limited, joint_range[:, 0], -np.pi)
|
||||||
|
upper_limits = np.where(is_limited, joint_range[:, 1], np.pi)
|
||||||
|
angles = uniform(lower_limits, upper_limits)
|
||||||
|
data.qpos[_ARM_JOINTS] = angles
|
||||||
|
|
||||||
|
# Symmetrize hand.
|
||||||
|
data.qpos['finger'] = data.qpos['thumb']
|
||||||
|
|
||||||
|
# Randomise target location.
|
||||||
|
target_x = uniform(-.4, .4)
|
||||||
|
target_z = uniform(.1, .4)
|
||||||
|
if self._insert:
|
||||||
|
target_angle = uniform(-np.pi/3, np.pi/3)
|
||||||
|
model.body_pos[self._receptacle, ['x', 'z']] = target_x, target_z
|
||||||
|
model.body_quat[self._receptacle, ['qw', 'qy']] = [
|
||||||
|
np.cos(target_angle/2), np.sin(target_angle/2)]
|
||||||
|
else:
|
||||||
|
target_angle = uniform(-np.pi, np.pi)
|
||||||
|
|
||||||
|
model.body_pos[self._target, ['x', 'z']] = target_x, target_z
|
||||||
|
model.body_quat[self._target, ['qw', 'qy']] = [
|
||||||
|
np.cos(target_angle/2), np.sin(target_angle/2)]
|
||||||
|
|
||||||
|
# Randomise object location.
|
||||||
|
object_init_probs = [_P_IN_HAND, _P_IN_TARGET, 1-_P_IN_HAND-_P_IN_TARGET]
|
||||||
|
init_type = choice(['in_hand', 'in_target', 'uniform'],
|
||||||
|
p=object_init_probs)
|
||||||
|
if init_type == 'in_target':
|
||||||
|
object_x = target_x
|
||||||
|
object_z = target_z
|
||||||
|
object_angle = target_angle
|
||||||
|
elif init_type == 'in_hand':
|
||||||
|
physics.after_reset()
|
||||||
|
object_x = data.site_xpos['grasp', 'x']
|
||||||
|
object_z = data.site_xpos['grasp', 'z']
|
||||||
|
grasp_direction = data.site_xmat['grasp', ['xx', 'zx']]
|
||||||
|
object_angle = np.pi-np.arctan2(grasp_direction[1], grasp_direction[0])
|
||||||
|
else:
|
||||||
|
object_x = uniform(-.5, .5)
|
||||||
|
object_z = uniform(0, .7)
|
||||||
|
object_angle = uniform(0, 2*np.pi)
|
||||||
|
data.qvel[self._object + '_x'] = uniform(-5, 5)
|
||||||
|
|
||||||
|
data.qpos[self._object_joints] = object_x, object_z, object_angle
|
||||||
|
|
||||||
|
# Check for collisions.
|
||||||
|
physics.after_reset()
|
||||||
|
penetrating = physics.data.ncon > 0
|
||||||
|
|
||||||
|
super(Bring, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns either features or only sensors (to be used with pixels)."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['arm_pos'] = physics.bounded_joint_pos(_ARM_JOINTS)
|
||||||
|
obs['arm_vel'] = physics.joint_vel(_ARM_JOINTS)
|
||||||
|
obs['touch'] = physics.touch()
|
||||||
|
if self._fully_observable:
|
||||||
|
obs['hand_pos'] = physics.body_2d_pose('hand')
|
||||||
|
obs['object_pos'] = physics.body_2d_pose(self._object)
|
||||||
|
obs['object_vel'] = physics.joint_vel(self._object_joints)
|
||||||
|
obs['target_pos'] = physics.body_2d_pose(self._target)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def _is_close(self, distance):
|
||||||
|
return rewards.tolerance(distance, (0, _CLOSE), _CLOSE*2)
|
||||||
|
|
||||||
|
def _peg_reward(self, physics):
|
||||||
|
"""Returns a reward for bringing the peg prop to the target."""
|
||||||
|
grasp = self._is_close(physics.site_distance('peg_grasp', 'grasp'))
|
||||||
|
pinch = self._is_close(physics.site_distance('peg_pinch', 'pinch'))
|
||||||
|
grasping = (grasp + pinch) / 2
|
||||||
|
bring = self._is_close(physics.site_distance('peg', 'target_peg'))
|
||||||
|
bring_tip = self._is_close(physics.site_distance('target_peg_tip',
|
||||||
|
'peg_tip'))
|
||||||
|
bringing = (bring + bring_tip) / 2
|
||||||
|
return max(bringing, grasping/3)
|
||||||
|
|
||||||
|
def _ball_reward(self, physics):
|
||||||
|
"""Returns a reward for bringing the ball prop to the target."""
|
||||||
|
return self._is_close(physics.site_distance('ball', 'target_ball'))
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a reward to the agent."""
|
||||||
|
if self._use_peg:
|
||||||
|
return self._peg_reward(physics)
|
||||||
|
else:
|
||||||
|
return self._ball_reward(physics)
|
211
Dreamer/local_dm_control_suite/manipulator.xml
Executable file
211
Dreamer/local_dm_control_suite/manipulator.xml
Executable file
@ -0,0 +1,211 @@
|
|||||||
|
<mujoco model="planar manipulator">
|
||||||
|
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
<asset>
|
||||||
|
<texture name="background" builtin="flat" type="2d" mark="random" markrgb="1 1 1" width="800" height="800" rgb1=".2 .3 .4"/>
|
||||||
|
<material name="background" texture="background" texrepeat="1 1" texuniform="true"/>
|
||||||
|
</asset>
|
||||||
|
|
||||||
|
<visual>
|
||||||
|
<map shadowclip=".5"/>
|
||||||
|
<quality shadowsize="2048"/>
|
||||||
|
</visual>>
|
||||||
|
|
||||||
|
<option timestep="0.001" cone="elliptic"/>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<geom friction=".7" solimp="0.9 0.97 0.001" solref=".005 1"/>
|
||||||
|
<joint solimplimit="0 0.99 0.01" solreflimit=".005 1"/>
|
||||||
|
<general ctrllimited="true"/>
|
||||||
|
<tendon width="0.01"/>
|
||||||
|
<site size=".003 .003 .003" material="site" group="3"/>
|
||||||
|
|
||||||
|
<default class="arm">
|
||||||
|
<geom type="capsule" material="self" density="500"/>
|
||||||
|
<joint type="hinge" pos="0 0 0" axis="0 -1 0" limited="true"/>
|
||||||
|
<default class="hand">
|
||||||
|
<joint damping=".5" range="-10 60"/>
|
||||||
|
<geom size=".008"/>
|
||||||
|
<site type="box" size=".018 .005 .005" pos=".022 0 -.002" euler="0 15 0" group="4"/>
|
||||||
|
<default class="fingertip">
|
||||||
|
<geom type="sphere" size=".008" material="effector"/>
|
||||||
|
<joint damping=".01" stiffness=".01" range="-40 20"/>
|
||||||
|
<site size=".012 .005 .008" pos=".003 0 .003" group="4" euler="0 0 0"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<default class="object">
|
||||||
|
<geom material="self"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<default class="task">
|
||||||
|
<site rgba="0 0 0 0"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<default class="obstacle">
|
||||||
|
<geom material="decoration" friction="0"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<default class="ghost">
|
||||||
|
<geom material="target" contype="0" conaffinity="0"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<!-- Arena -->
|
||||||
|
<light name="light" directional="true" diffuse=".6 .6 .6" pos="0 0 1" specular=".3 .3 .3"/>
|
||||||
|
<geom name="floor" type="plane" pos="0 0 0" size=".4 .2 10" material="grid"/>
|
||||||
|
<geom name="wall1" type="plane" pos="-.682843 0 .282843" size=".4 .2 10" material="grid" zaxis="1 0 1"/>
|
||||||
|
<geom name="wall2" type="plane" pos=".682843 0 .282843" size=".4 .2 10" material="grid" zaxis="-1 0 1"/>
|
||||||
|
<geom name="background" type="plane" pos="0 .2 .5" size="1 .5 10" material="background" zaxis="0 -1 0"/>
|
||||||
|
<camera name="fixed" pos="0 -16 .4" xyaxes="1 0 0 0 0 1" fovy="4"/>
|
||||||
|
|
||||||
|
<!-- Arm -->
|
||||||
|
<geom name="arm_root" type="cylinder" fromto="0 -.022 .4 0 .022 .4" size=".024"
|
||||||
|
material="decoration" contype="0" conaffinity="0"/>
|
||||||
|
<body name="upper_arm" pos="0 0 .4" childclass="arm">
|
||||||
|
<joint name="arm_root" damping="2" limited="false"/>
|
||||||
|
<geom name="upper_arm" size=".02" fromto="0 0 0 0 0 .18"/>
|
||||||
|
<body name="middle_arm" pos="0 0 .18" childclass="arm">
|
||||||
|
<joint name="arm_shoulder" damping="1.5" range="-160 160"/>
|
||||||
|
<geom name="middle_arm" size=".017" fromto="0 0 0 0 0 .15"/>
|
||||||
|
<body name="lower_arm" pos="0 0 .15">
|
||||||
|
<joint name="arm_elbow" damping="1" range="-160 160"/>
|
||||||
|
<geom name="lower_arm" size=".014" fromto="0 0 0 0 0 .12"/>
|
||||||
|
<body name="hand" pos="0 0 .12">
|
||||||
|
<joint name="arm_wrist" damping=".5" range="-140 140" />
|
||||||
|
<geom name="hand" size=".011" fromto="0 0 0 0 0 .03"/>
|
||||||
|
<geom name="palm1" fromto="0 0 .03 .03 0 .045" class="hand"/>
|
||||||
|
<geom name="palm2" fromto="0 0 .03 -.03 0 .045" class="hand"/>
|
||||||
|
<site name="grasp" pos="0 0 .065"/>
|
||||||
|
<body name="pinch site" pos="0 0 .090">
|
||||||
|
<site name="pinch"/>
|
||||||
|
<inertial pos="0 0 0" mass="1e-6" diaginertia="1e-12 1e-12 1e-12"/>
|
||||||
|
<camera name="hand" pos="0 -.3 0" xyaxes="1 0 0 0 0 1" mode="track"/>
|
||||||
|
</body>
|
||||||
|
<site name="palm_touch" type="box" group="4" size=".025 .005 .008" pos="0 0 .043"/>
|
||||||
|
|
||||||
|
<body name="thumb" pos=".03 0 .045" euler="0 -90 0" childclass="hand">
|
||||||
|
<joint name="thumb"/>
|
||||||
|
<geom name="thumb1" fromto="0 0 0 .02 0 -.01" size=".007"/>
|
||||||
|
<geom name="thumb2" fromto=".02 0 -.01 .04 0 -.01" size=".007"/>
|
||||||
|
<site name="thumb_touch" group="4"/>
|
||||||
|
<body name="thumbtip" pos=".05 0 -.01" childclass="fingertip">
|
||||||
|
<joint name="thumbtip"/>
|
||||||
|
<geom name="thumbtip1" pos="-.003 0 0" />
|
||||||
|
<geom name="thumbtip2" pos=".003 0 0" />
|
||||||
|
<site name="thumbtip_touch" group="4"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="finger" pos="-.03 0 .045" euler="0 90 180" childclass="hand">
|
||||||
|
<joint name="finger"/>
|
||||||
|
<geom name="finger1" fromto="0 0 0 .02 0 -.01" size=".007" />
|
||||||
|
<geom name="finger2" fromto=".02 0 -.01 .04 0 -.01" size=".007"/>
|
||||||
|
<site name="finger_touch"/>
|
||||||
|
<body name="fingertip" pos=".05 0 -.01" childclass="fingertip">
|
||||||
|
<joint name="fingertip"/>
|
||||||
|
<geom name="fingertip1" pos="-.003 0 0" />
|
||||||
|
<geom name="fingertip2" pos=".003 0 0" />
|
||||||
|
<site name="fingertip_touch"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<!-- props -->
|
||||||
|
<body name="ball" pos=".4 0 .4" childclass="object">
|
||||||
|
<joint name="ball_x" type="slide" axis="1 0 0" ref=".4"/>
|
||||||
|
<joint name="ball_z" type="slide" axis="0 0 1" ref=".4"/>
|
||||||
|
<joint name="ball_y" type="hinge" axis="0 1 0"/>
|
||||||
|
<geom name="ball" type="sphere" size=".022" />
|
||||||
|
<site name="ball" type="sphere"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="peg" pos="-.4 0 .4" childclass="object">
|
||||||
|
<joint name="peg_x" type="slide" axis="1 0 0" ref="-.4"/>
|
||||||
|
<joint name="peg_z" type="slide" axis="0 0 1" ref=".4"/>
|
||||||
|
<joint name="peg_y" type="hinge" axis="0 1 0"/>
|
||||||
|
<geom name="blade" type="capsule" size=".005" fromto="0 0 -.013 0 0 -.113"/>
|
||||||
|
<geom name="guard" type="capsule" size=".005" fromto="-.017 0 -.043 .017 0 -.043"/>
|
||||||
|
<body name="pommel" pos="0 0 -.013">
|
||||||
|
<geom name="pommel" type="sphere" size=".009"/>
|
||||||
|
</body>
|
||||||
|
<site name="peg" type="box" pos="0 0 -.063"/>
|
||||||
|
<site name="peg_pinch" type="box" pos="0 0 -.025"/>
|
||||||
|
<site name="peg_grasp" type="box" pos="0 0 0"/>
|
||||||
|
<site name="peg_tip" type="box" pos="0 0 -.113"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<!-- receptacles -->
|
||||||
|
<body name="slot" pos="-.405 0 .2" euler="0 20 0" childclass="obstacle">
|
||||||
|
<geom name="slot_0" type="box" pos="-.0252 0 -.083" size=".0198 .01 .035"/>
|
||||||
|
<geom name="slot_1" type="box" pos=" .0252 0 -.083" size=".0198 .01 .035"/>
|
||||||
|
<geom name="slot_2" type="box" pos=" 0 0 -.138" size=".045 .01 .02"/>
|
||||||
|
<site name="slot" type="box" pos="0 0 0"/>
|
||||||
|
<site name="slot_end" type="box" pos="0 0 -.05"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="cup" pos=".3 0 .4" euler="0 -15 0" childclass="obstacle">
|
||||||
|
<geom name="cup_0" type="capsule" size=".008" fromto="-.03 0 .06 -.03 0 -.015" />
|
||||||
|
<geom name="cup_1" type="capsule" size=".008" fromto="-.03 0 -.015 0 0 -.04" />
|
||||||
|
<geom name="cup_2" type="capsule" size=".008" fromto="0 0 -.04 .03 0 -.015" />
|
||||||
|
<geom name="cup_3" type="capsule" size=".008" fromto=".03 0 -.015 .03 0 .06" />
|
||||||
|
<site name="cup" size=".005"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<!-- targets -->
|
||||||
|
<body name="target_ball" pos=".4 .001 .4" childclass="ghost">
|
||||||
|
<geom name="target_ball" type="sphere" size=".02" />
|
||||||
|
<site name="target_ball" type="sphere"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="target_peg" pos="-.2 .001 .4" childclass="ghost">
|
||||||
|
<geom name="target_blade" type="capsule" size=".005" fromto="0 0 -.013 0 0 -.113"/>
|
||||||
|
<geom name="target_guard" type="capsule" size=".005" fromto="-.017 0 -.043 .017 0 -.043"/>
|
||||||
|
<geom name="target_pommel" type="sphere" size=".009" pos="0 0 -.013"/>
|
||||||
|
<site name="target_peg" type="box" pos="0 0 -.063"/>
|
||||||
|
<site name="target_peg_pinch" type="box" pos="0 0 -.025"/>
|
||||||
|
<site name="target_peg_grasp" type="box" pos="0 0 0"/>
|
||||||
|
<site name="target_peg_tip" type="box" pos="0 0 -.113"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<tendon>
|
||||||
|
<fixed name="grasp">
|
||||||
|
<joint joint="thumb" coef=".5"/>
|
||||||
|
<joint joint="finger" coef=".5"/>
|
||||||
|
</fixed>
|
||||||
|
<fixed name="coupling">
|
||||||
|
<joint joint="thumb" coef="-.5"/>
|
||||||
|
<joint joint="finger" coef=".5"/>
|
||||||
|
</fixed>
|
||||||
|
</tendon>
|
||||||
|
|
||||||
|
<equality>
|
||||||
|
<tendon name="coupling" tendon1="coupling" solimp="0.95 0.99 0.001" solref=".005 .5"/>
|
||||||
|
</equality>
|
||||||
|
|
||||||
|
<sensor>
|
||||||
|
<touch name="palm_touch" site="palm_touch"/>
|
||||||
|
<touch name="finger_touch" site="finger_touch"/>
|
||||||
|
<touch name="thumb_touch" site="thumb_touch"/>
|
||||||
|
<touch name="fingertip_touch" site="fingertip_touch"/>
|
||||||
|
<touch name="thumbtip_touch" site="thumbtip_touch"/>
|
||||||
|
</sensor>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="root" joint="arm_root" ctrlrange="-1 1" gear="12"/>
|
||||||
|
<motor name="shoulder" joint="arm_shoulder" ctrlrange="-1 1" gear="8"/>
|
||||||
|
<motor name="elbow" joint="arm_elbow" ctrlrange="-1 1" gear="4"/>
|
||||||
|
<motor name="wrist" joint="arm_wrist" ctrlrange="-1 1" gear="2"/>
|
||||||
|
<motor name="grasp" tendon="grasp" ctrlrange="-1 1" gear="2"/>
|
||||||
|
</actuator>
|
||||||
|
|
||||||
|
</mujoco>
|
114
Dreamer/local_dm_control_suite/pendulum.py
Executable file
114
Dreamer/local_dm_control_suite/pendulum.py
Executable file
@ -0,0 +1,114 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Pendulum domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 20
|
||||||
|
_ANGLE_BOUND = 8
|
||||||
|
_COSINE_BOUND = np.cos(np.deg2rad(_ANGLE_BOUND))
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('pendulum.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def swingup(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns pendulum swingup task ."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = SwingUp(random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the Pendulum domain."""
|
||||||
|
|
||||||
|
def pole_vertical(self):
|
||||||
|
"""Returns vertical (z) component of pole frame."""
|
||||||
|
return self.named.data.xmat['pole', 'zz']
|
||||||
|
|
||||||
|
def angular_velocity(self):
|
||||||
|
"""Returns the angular velocity of the pole."""
|
||||||
|
return self.named.data.qvel['hinge'].copy()
|
||||||
|
|
||||||
|
def pole_orientation(self):
|
||||||
|
"""Returns both horizontal and vertical components of pole frame."""
|
||||||
|
return self.named.data.xmat['pole', ['zz', 'xz']]
|
||||||
|
|
||||||
|
|
||||||
|
class SwingUp(base.Task):
|
||||||
|
"""A Pendulum `Task` to swing up and balance the pole."""
|
||||||
|
|
||||||
|
def __init__(self, random=None):
|
||||||
|
"""Initialize an instance of `Pendulum`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
super(SwingUp, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode.
|
||||||
|
|
||||||
|
Pole is set to a random angle between [-pi, pi).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
physics.named.data.qpos['hinge'] = self.random.uniform(-np.pi, np.pi)
|
||||||
|
super(SwingUp, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation.
|
||||||
|
|
||||||
|
Observations are states concatenating pole orientation and angular velocity
|
||||||
|
and pixels from fixed camera.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `physics`, Pendulum physics.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A `dict` of observation.
|
||||||
|
"""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['orientation'] = physics.pole_orientation()
|
||||||
|
obs['velocity'] = physics.angular_velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
return rewards.tolerance(physics.pole_vertical(), (_COSINE_BOUND, 1))
|
26
Dreamer/local_dm_control_suite/pendulum.xml
Executable file
26
Dreamer/local_dm_control_suite/pendulum.xml
Executable file
@ -0,0 +1,26 @@
|
|||||||
|
<mujoco model="pendulum">
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<option timestep="0.02">
|
||||||
|
<flag contact="disable" energy="enable"/>
|
||||||
|
</option>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<light name="light" pos="0 0 2"/>
|
||||||
|
<geom name="floor" size="2 2 .2" type="plane" material="grid"/>
|
||||||
|
<camera name="fixed" pos="0 -1.5 2" xyaxes='1 0 0 0 1 1'/>
|
||||||
|
<camera name="lookat" mode="targetbodycom" target="pole" pos="0 -2 1"/>
|
||||||
|
<body name="pole" pos="0 0 .6">
|
||||||
|
<joint name="hinge" type="hinge" axis="0 1 0" damping="0.1"/>
|
||||||
|
<geom name="base" material="decoration" type="cylinder" fromto="0 -.03 0 0 .03 0" size="0.021" mass="0"/>
|
||||||
|
<geom name="pole" material="self" type="capsule" fromto="0 0 0 0 0 0.5" size="0.02" mass="0"/>
|
||||||
|
<geom name="mass" material="effector" type="sphere" pos="0 0 0.5" size="0.05" mass="1"/>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="torque" joint="hinge" gear="1" ctrlrange="-1 1" ctrllimited="true"/>
|
||||||
|
</actuator>
|
||||||
|
</mujoco>
|
130
Dreamer/local_dm_control_suite/point_mass.py
Executable file
130
Dreamer/local_dm_control_suite/point_mass.py
Executable file
@ -0,0 +1,130 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Point-mass domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.suite.utils import randomizers
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 20
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('point_mass.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking', 'easy')
|
||||||
|
def easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the easy point_mass task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = PointMass(randomize_gains=False, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the hard point_mass task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = PointMass(randomize_gains=True, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""physics for the point_mass domain."""
|
||||||
|
|
||||||
|
def mass_to_target(self):
|
||||||
|
"""Returns the vector from mass to target in global coordinate."""
|
||||||
|
return (self.named.data.geom_xpos['target'] -
|
||||||
|
self.named.data.geom_xpos['pointmass'])
|
||||||
|
|
||||||
|
def mass_to_target_dist(self):
|
||||||
|
"""Returns the distance from mass to the target."""
|
||||||
|
return np.linalg.norm(self.mass_to_target())
|
||||||
|
|
||||||
|
|
||||||
|
class PointMass(base.Task):
|
||||||
|
"""A point_mass `Task` to reach target with smooth reward."""
|
||||||
|
|
||||||
|
def __init__(self, randomize_gains, random=None):
|
||||||
|
"""Initialize an instance of `PointMass`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
randomize_gains: A `bool`, whether to randomize the actuator gains.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._randomize_gains = randomize_gains
|
||||||
|
super(PointMass, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode.
|
||||||
|
|
||||||
|
If _randomize_gains is True, the relationship between the controls and
|
||||||
|
the joints is randomized, so that each control actuates a random linear
|
||||||
|
combination of joints.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `mujoco.Physics`.
|
||||||
|
"""
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, self.random)
|
||||||
|
if self._randomize_gains:
|
||||||
|
dir1 = self.random.randn(2)
|
||||||
|
dir1 /= np.linalg.norm(dir1)
|
||||||
|
# Find another actuation direction that is not 'too parallel' to dir1.
|
||||||
|
parallel = True
|
||||||
|
while parallel:
|
||||||
|
dir2 = self.random.randn(2)
|
||||||
|
dir2 /= np.linalg.norm(dir2)
|
||||||
|
parallel = abs(np.dot(dir1, dir2)) > 0.9
|
||||||
|
physics.model.wrap_prm[[0, 1]] = dir1
|
||||||
|
physics.model.wrap_prm[[2, 3]] = dir2
|
||||||
|
super(PointMass, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of the state."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['position'] = physics.position()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a reward to the agent."""
|
||||||
|
target_size = physics.named.model.geom_size['target', 0]
|
||||||
|
near_target = rewards.tolerance(physics.mass_to_target_dist(),
|
||||||
|
bounds=(0, target_size), margin=target_size)
|
||||||
|
control_reward = rewards.tolerance(physics.control(), margin=1,
|
||||||
|
value_at_margin=0,
|
||||||
|
sigmoid='quadratic').mean()
|
||||||
|
small_control = (control_reward + 4) / 5
|
||||||
|
return near_target * small_control
|
49
Dreamer/local_dm_control_suite/point_mass.xml
Executable file
49
Dreamer/local_dm_control_suite/point_mass.xml
Executable file
@ -0,0 +1,49 @@
|
|||||||
|
<mujoco model="planar point mass">
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<option timestep="0.02">
|
||||||
|
<flag contact="disable"/>
|
||||||
|
</option>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<joint type="hinge" axis="0 0 1" limited="true" range="-.29 .29" damping="1"/>
|
||||||
|
<motor gear=".1" ctrlrange="-1 1" ctrllimited="true"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<light name="light" pos="0 0 1"/>
|
||||||
|
<camera name="fixed" pos="0 0 .75" quat="1 0 0 0"/>
|
||||||
|
<geom name="ground" type="plane" pos="0 0 0" size=".3 .3 .1" material="grid"/>
|
||||||
|
<geom name="wall_x" type="plane" pos="-.3 0 .02" zaxis="1 0 0" size=".02 .3 .02" material="decoration"/>
|
||||||
|
<geom name="wall_y" type="plane" pos="0 -.3 .02" zaxis="0 1 0" size=".3 .02 .02" material="decoration"/>
|
||||||
|
<geom name="wall_neg_x" type="plane" pos=".3 0 .02" zaxis="-1 0 0" size=".02 .3 .02" material="decoration"/>
|
||||||
|
<geom name="wall_neg_y" type="plane" pos="0 .3 .02" zaxis="0 -1 0" size=".3 .02 .02" material="decoration"/>
|
||||||
|
|
||||||
|
<body name="pointmass" pos="0 0 .01">
|
||||||
|
<camera name="cam0" pos="0 -0.3 0.3" xyaxes="1 0 0 0 0.7 0.7"/>
|
||||||
|
<joint name="root_x" type="slide" pos="0 0 0" axis="1 0 0" />
|
||||||
|
<joint name="root_y" type="slide" pos="0 0 0" axis="0 1 0" />
|
||||||
|
<geom name="pointmass" type="sphere" size=".01" material="self" mass=".3"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<geom name="target" pos="0 0 .01" material="target" type="sphere" size=".015"/>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<tendon>
|
||||||
|
<fixed name="t1">
|
||||||
|
<joint joint="root_x" coef="1"/>
|
||||||
|
<joint joint="root_y" coef="0"/>
|
||||||
|
</fixed>
|
||||||
|
<fixed name="t2">
|
||||||
|
<joint joint="root_x" coef="0"/>
|
||||||
|
<joint joint="root_y" coef="1"/>
|
||||||
|
</fixed>
|
||||||
|
</tendon>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="t1" tendon="t1"/>
|
||||||
|
<motor name="t2" tendon="t2"/>
|
||||||
|
</actuator>
|
||||||
|
</mujoco>
|
480
Dreamer/local_dm_control_suite/quadruped.py
Executable file
480
Dreamer/local_dm_control_suite/quadruped.py
Executable file
@ -0,0 +1,480 @@
|
|||||||
|
# Copyright 2019 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Quadruped Domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.mujoco.wrapper import mjbindings
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
from dm_control.utils import xml_tools
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
import numpy as np
|
||||||
|
from scipy import ndimage
|
||||||
|
|
||||||
|
enums = mjbindings.enums
|
||||||
|
mjlib = mjbindings.mjlib
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 20
|
||||||
|
_CONTROL_TIMESTEP = .02
|
||||||
|
|
||||||
|
# Horizontal speeds above which the move reward is 1.
|
||||||
|
_RUN_SPEED = 5
|
||||||
|
_WALK_SPEED = 0.5
|
||||||
|
|
||||||
|
# Constants related to terrain generation.
|
||||||
|
_HEIGHTFIELD_ID = 0
|
||||||
|
_TERRAIN_SMOOTHNESS = 0.15 # 0.0: maximally bumpy; 1.0: completely smooth.
|
||||||
|
_TERRAIN_BUMP_SCALE = 2 # Spatial scale of terrain bumps (in meters).
|
||||||
|
|
||||||
|
# Named model elements.
|
||||||
|
_TOES = ['toe_front_left', 'toe_back_left', 'toe_back_right', 'toe_front_right']
|
||||||
|
_WALLS = ['wall_px', 'wall_py', 'wall_nx', 'wall_ny']
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def make_model(floor_size=None, terrain=False, rangefinders=False,
|
||||||
|
walls_and_ball=False):
|
||||||
|
"""Returns the model XML string."""
|
||||||
|
xml_string = common.read_model('quadruped.xml')
|
||||||
|
parser = etree.XMLParser(remove_blank_text=True)
|
||||||
|
mjcf = etree.XML(xml_string, parser)
|
||||||
|
|
||||||
|
# Set floor size.
|
||||||
|
if floor_size is not None:
|
||||||
|
floor_geom = mjcf.find('.//geom[@name={!r}]'.format('floor'))
|
||||||
|
floor_geom.attrib['size'] = '{} {} .5'.format(floor_size, floor_size)
|
||||||
|
|
||||||
|
# Remove walls, ball and target.
|
||||||
|
if not walls_and_ball:
|
||||||
|
for wall in _WALLS:
|
||||||
|
wall_geom = xml_tools.find_element(mjcf, 'geom', wall)
|
||||||
|
wall_geom.getparent().remove(wall_geom)
|
||||||
|
|
||||||
|
# Remove ball.
|
||||||
|
ball_body = xml_tools.find_element(mjcf, 'body', 'ball')
|
||||||
|
ball_body.getparent().remove(ball_body)
|
||||||
|
|
||||||
|
# Remove target.
|
||||||
|
target_site = xml_tools.find_element(mjcf, 'site', 'target')
|
||||||
|
target_site.getparent().remove(target_site)
|
||||||
|
|
||||||
|
# Remove terrain.
|
||||||
|
if not terrain:
|
||||||
|
terrain_geom = xml_tools.find_element(mjcf, 'geom', 'terrain')
|
||||||
|
terrain_geom.getparent().remove(terrain_geom)
|
||||||
|
|
||||||
|
# Remove rangefinders if they're not used, as range computations can be
|
||||||
|
# expensive, especially in a scene with heightfields.
|
||||||
|
if not rangefinders:
|
||||||
|
rangefinder_sensors = mjcf.findall('.//rangefinder')
|
||||||
|
for rf in rangefinder_sensors:
|
||||||
|
rf.getparent().remove(rf)
|
||||||
|
|
||||||
|
return etree.tostring(mjcf, pretty_print=True)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Walk task."""
|
||||||
|
xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
|
||||||
|
physics = Physics.from_xml_string(xml_string, common.ASSETS)
|
||||||
|
task = Move(desired_speed=_WALK_SPEED, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(physics, task, time_limit=time_limit,
|
||||||
|
control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Run task."""
|
||||||
|
xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _RUN_SPEED)
|
||||||
|
physics = Physics.from_xml_string(xml_string, common.ASSETS)
|
||||||
|
task = Move(desired_speed=_RUN_SPEED, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(physics, task, time_limit=time_limit,
|
||||||
|
control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def escape(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns the Escape task."""
|
||||||
|
xml_string = make_model(floor_size=40, terrain=True, rangefinders=True)
|
||||||
|
physics = Physics.from_xml_string(xml_string, common.ASSETS)
|
||||||
|
task = Escape(random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(physics, task, time_limit=time_limit,
|
||||||
|
control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add()
|
||||||
|
def fetch(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Fetch task."""
|
||||||
|
xml_string = make_model(walls_and_ball=True)
|
||||||
|
physics = Physics.from_xml_string(xml_string, common.ASSETS)
|
||||||
|
task = Fetch(random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(physics, task, time_limit=time_limit,
|
||||||
|
control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the Quadruped domain."""
|
||||||
|
|
||||||
|
def _reload_from_data(self, data):
|
||||||
|
super(Physics, self)._reload_from_data(data)
|
||||||
|
# Clear cached sensor names when the physics is reloaded.
|
||||||
|
self._sensor_types_to_names = {}
|
||||||
|
self._hinge_names = []
|
||||||
|
|
||||||
|
def _get_sensor_names(self, *sensor_types):
|
||||||
|
try:
|
||||||
|
sensor_names = self._sensor_types_to_names[sensor_types]
|
||||||
|
except KeyError:
|
||||||
|
[sensor_ids] = np.where(np.in1d(self.model.sensor_type, sensor_types))
|
||||||
|
sensor_names = [self.model.id2name(s_id, 'sensor') for s_id in sensor_ids]
|
||||||
|
self._sensor_types_to_names[sensor_types] = sensor_names
|
||||||
|
return sensor_names
|
||||||
|
|
||||||
|
def torso_upright(self):
|
||||||
|
"""Returns the dot-product of the torso z-axis and the global z-axis."""
|
||||||
|
return np.asarray(self.named.data.xmat['torso', 'zz'])
|
||||||
|
|
||||||
|
def torso_velocity(self):
|
||||||
|
"""Returns the velocity of the torso, in the local frame."""
|
||||||
|
return self.named.data.sensordata['velocimeter'].copy()
|
||||||
|
|
||||||
|
def egocentric_state(self):
|
||||||
|
"""Returns the state without global orientation or position."""
|
||||||
|
if not self._hinge_names:
|
||||||
|
[hinge_ids] = np.nonzero(self.model.jnt_type ==
|
||||||
|
enums.mjtJoint.mjJNT_HINGE)
|
||||||
|
self._hinge_names = [self.model.id2name(j_id, 'joint')
|
||||||
|
for j_id in hinge_ids]
|
||||||
|
return np.hstack((self.named.data.qpos[self._hinge_names],
|
||||||
|
self.named.data.qvel[self._hinge_names],
|
||||||
|
self.data.act))
|
||||||
|
|
||||||
|
def toe_positions(self):
|
||||||
|
"""Returns toe positions in egocentric frame."""
|
||||||
|
torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
|
||||||
|
torso_pos = self.named.data.xpos['torso']
|
||||||
|
torso_to_toe = self.named.data.xpos[_TOES] - torso_pos
|
||||||
|
return torso_to_toe.dot(torso_frame)
|
||||||
|
|
||||||
|
def force_torque(self):
|
||||||
|
"""Returns scaled force/torque sensor readings at the toes."""
|
||||||
|
force_torque_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_FORCE,
|
||||||
|
enums.mjtSensor.mjSENS_TORQUE)
|
||||||
|
return np.arcsinh(self.named.data.sensordata[force_torque_sensors])
|
||||||
|
|
||||||
|
def imu(self):
|
||||||
|
"""Returns IMU-like sensor readings."""
|
||||||
|
imu_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_GYRO,
|
||||||
|
enums.mjtSensor.mjSENS_ACCELEROMETER)
|
||||||
|
return self.named.data.sensordata[imu_sensors]
|
||||||
|
|
||||||
|
def rangefinder(self):
|
||||||
|
"""Returns scaled rangefinder sensor readings."""
|
||||||
|
rf_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_RANGEFINDER)
|
||||||
|
rf_readings = self.named.data.sensordata[rf_sensors]
|
||||||
|
no_intersection = -1.0
|
||||||
|
return np.where(rf_readings == no_intersection, 1.0, np.tanh(rf_readings))
|
||||||
|
|
||||||
|
def origin_distance(self):
|
||||||
|
"""Returns the distance from the origin to the workspace."""
|
||||||
|
return np.asarray(np.linalg.norm(self.named.data.site_xpos['workspace']))
|
||||||
|
|
||||||
|
def origin(self):
|
||||||
|
"""Returns origin position in the torso frame."""
|
||||||
|
torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
|
||||||
|
torso_pos = self.named.data.xpos['torso']
|
||||||
|
return -torso_pos.dot(torso_frame)
|
||||||
|
|
||||||
|
def ball_state(self):
|
||||||
|
"""Returns ball position and velocity relative to the torso frame."""
|
||||||
|
data = self.named.data
|
||||||
|
torso_frame = data.xmat['torso'].reshape(3, 3)
|
||||||
|
ball_rel_pos = data.xpos['ball'] - data.xpos['torso']
|
||||||
|
ball_rel_vel = data.qvel['ball_root'][:3] - data.qvel['root'][:3]
|
||||||
|
ball_rot_vel = data.qvel['ball_root'][3:]
|
||||||
|
ball_state = np.vstack((ball_rel_pos, ball_rel_vel, ball_rot_vel))
|
||||||
|
return ball_state.dot(torso_frame).ravel()
|
||||||
|
|
||||||
|
def target_position(self):
|
||||||
|
"""Returns target position in torso frame."""
|
||||||
|
torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
|
||||||
|
torso_pos = self.named.data.xpos['torso']
|
||||||
|
torso_to_target = self.named.data.site_xpos['target'] - torso_pos
|
||||||
|
return torso_to_target.dot(torso_frame)
|
||||||
|
|
||||||
|
def ball_to_target_distance(self):
|
||||||
|
"""Returns horizontal distance from the ball to the target."""
|
||||||
|
ball_to_target = (self.named.data.site_xpos['target'] -
|
||||||
|
self.named.data.xpos['ball'])
|
||||||
|
return np.linalg.norm(ball_to_target[:2])
|
||||||
|
|
||||||
|
def self_to_ball_distance(self):
|
||||||
|
"""Returns horizontal distance from the quadruped workspace to the ball."""
|
||||||
|
self_to_ball = (self.named.data.site_xpos['workspace']
|
||||||
|
-self.named.data.xpos['ball'])
|
||||||
|
return np.linalg.norm(self_to_ball[:2])
|
||||||
|
|
||||||
|
|
||||||
|
def _find_non_contacting_height(physics, orientation, x_pos=0.0, y_pos=0.0):
|
||||||
|
"""Find a height with no contacts given a body orientation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
orientation: A quaternion.
|
||||||
|
x_pos: A float. Position along global x-axis.
|
||||||
|
y_pos: A float. Position along global y-axis.
|
||||||
|
Raises:
|
||||||
|
RuntimeError: If a non-contacting configuration has not been found after
|
||||||
|
10,000 attempts.
|
||||||
|
"""
|
||||||
|
z_pos = 0.0 # Start embedded in the floor.
|
||||||
|
num_contacts = 1
|
||||||
|
num_attempts = 0
|
||||||
|
# Move up in 1cm increments until no contacts.
|
||||||
|
while num_contacts > 0:
|
||||||
|
try:
|
||||||
|
with physics.reset_context():
|
||||||
|
physics.named.data.qpos['root'][:3] = x_pos, y_pos, z_pos
|
||||||
|
physics.named.data.qpos['root'][3:] = orientation
|
||||||
|
except control.PhysicsError:
|
||||||
|
# We may encounter a PhysicsError here due to filling the contact
|
||||||
|
# buffer, in which case we simply increment the height and continue.
|
||||||
|
pass
|
||||||
|
num_contacts = physics.data.ncon
|
||||||
|
z_pos += 0.01
|
||||||
|
num_attempts += 1
|
||||||
|
if num_attempts > 10000:
|
||||||
|
raise RuntimeError('Failed to find a non-contacting configuration.')
|
||||||
|
|
||||||
|
|
||||||
|
def _common_observations(physics):
|
||||||
|
"""Returns the observations common to all tasks."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['egocentric_state'] = physics.egocentric_state()
|
||||||
|
obs['torso_velocity'] = physics.torso_velocity()
|
||||||
|
obs['torso_upright'] = physics.torso_upright()
|
||||||
|
obs['imu'] = physics.imu()
|
||||||
|
obs['force_torque'] = physics.force_torque()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
|
||||||
|
def _upright_reward(physics, deviation_angle=0):
|
||||||
|
"""Returns a reward proportional to how upright the torso is.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: an instance of `Physics`.
|
||||||
|
deviation_angle: A float, in degrees. The reward is 0 when the torso is
|
||||||
|
exactly upside-down and 1 when the torso's z-axis is less than
|
||||||
|
`deviation_angle` away from the global z-axis.
|
||||||
|
"""
|
||||||
|
deviation = np.cos(np.deg2rad(deviation_angle))
|
||||||
|
return rewards.tolerance(
|
||||||
|
physics.torso_upright(),
|
||||||
|
bounds=(deviation, float('inf')),
|
||||||
|
sigmoid='linear',
|
||||||
|
margin=1 + deviation,
|
||||||
|
value_at_margin=0)
|
||||||
|
|
||||||
|
|
||||||
|
class Move(base.Task):
|
||||||
|
"""A quadruped task solved by moving forward at a designated speed."""
|
||||||
|
|
||||||
|
def __init__(self, desired_speed, random=None):
|
||||||
|
"""Initializes an instance of `Move`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
desired_speed: A float. If this value is zero, reward is given simply
|
||||||
|
for standing upright. Otherwise this specifies the horizontal velocity
|
||||||
|
at which the velocity-dependent reward component is maximized.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._desired_speed = desired_speed
|
||||||
|
super(Move, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Initial configuration.
|
||||||
|
orientation = self.random.randn(4)
|
||||||
|
orientation /= np.linalg.norm(orientation)
|
||||||
|
_find_non_contacting_height(physics, orientation)
|
||||||
|
super(Move, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation to the agent."""
|
||||||
|
return _common_observations(physics)
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a reward to the agent."""
|
||||||
|
|
||||||
|
# Move reward term.
|
||||||
|
move_reward = rewards.tolerance(
|
||||||
|
physics.torso_velocity()[0],
|
||||||
|
bounds=(self._desired_speed, float('inf')),
|
||||||
|
margin=self._desired_speed,
|
||||||
|
value_at_margin=0.5,
|
||||||
|
sigmoid='linear')
|
||||||
|
|
||||||
|
return _upright_reward(physics) * move_reward
|
||||||
|
|
||||||
|
|
||||||
|
class Escape(base.Task):
|
||||||
|
"""A quadruped task solved by escaping a bowl-shaped terrain."""
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Get heightfield resolution, assert that it is square.
|
||||||
|
res = physics.model.hfield_nrow[_HEIGHTFIELD_ID]
|
||||||
|
assert res == physics.model.hfield_ncol[_HEIGHTFIELD_ID]
|
||||||
|
# Sinusoidal bowl shape.
|
||||||
|
row_grid, col_grid = np.ogrid[-1:1:res*1j, -1:1:res*1j]
|
||||||
|
radius = np.clip(np.sqrt(col_grid**2 + row_grid**2), .04, 1)
|
||||||
|
bowl_shape = .5 - np.cos(2*np.pi*radius)/2
|
||||||
|
# Random smooth bumps.
|
||||||
|
terrain_size = 2 * physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
|
||||||
|
bump_res = int(terrain_size / _TERRAIN_BUMP_SCALE)
|
||||||
|
bumps = self.random.uniform(_TERRAIN_SMOOTHNESS, 1, (bump_res, bump_res))
|
||||||
|
smooth_bumps = ndimage.zoom(bumps, res / float(bump_res))
|
||||||
|
# Terrain is elementwise product.
|
||||||
|
terrain = bowl_shape * smooth_bumps
|
||||||
|
start_idx = physics.model.hfield_adr[_HEIGHTFIELD_ID]
|
||||||
|
physics.model.hfield_data[start_idx:start_idx+res**2] = terrain.ravel()
|
||||||
|
super(Escape, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
# If we have a rendering context, we need to re-upload the modified
|
||||||
|
# heightfield data.
|
||||||
|
if physics.contexts:
|
||||||
|
with physics.contexts.gl.make_current() as ctx:
|
||||||
|
ctx.call(mjlib.mjr_uploadHField,
|
||||||
|
physics.model.ptr,
|
||||||
|
physics.contexts.mujoco.ptr,
|
||||||
|
_HEIGHTFIELD_ID)
|
||||||
|
|
||||||
|
# Initial configuration.
|
||||||
|
orientation = self.random.randn(4)
|
||||||
|
orientation /= np.linalg.norm(orientation)
|
||||||
|
_find_non_contacting_height(physics, orientation)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation to the agent."""
|
||||||
|
obs = _common_observations(physics)
|
||||||
|
obs['origin'] = physics.origin()
|
||||||
|
obs['rangefinder'] = physics.rangefinder()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a reward to the agent."""
|
||||||
|
|
||||||
|
# Escape reward term.
|
||||||
|
terrain_size = physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
|
||||||
|
escape_reward = rewards.tolerance(
|
||||||
|
physics.origin_distance(),
|
||||||
|
bounds=(terrain_size, float('inf')),
|
||||||
|
margin=terrain_size,
|
||||||
|
value_at_margin=0,
|
||||||
|
sigmoid='linear')
|
||||||
|
|
||||||
|
return _upright_reward(physics, deviation_angle=20) * escape_reward
|
||||||
|
|
||||||
|
|
||||||
|
class Fetch(base.Task):
|
||||||
|
"""A quadruped task solved by bringing a ball to the origin."""
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Initial configuration, random azimuth and horizontal position.
|
||||||
|
azimuth = self.random.uniform(0, 2*np.pi)
|
||||||
|
orientation = np.array((np.cos(azimuth/2), 0, 0, np.sin(azimuth/2)))
|
||||||
|
spawn_radius = 0.9 * physics.named.model.geom_size['floor', 0]
|
||||||
|
x_pos, y_pos = self.random.uniform(-spawn_radius, spawn_radius, size=(2,))
|
||||||
|
_find_non_contacting_height(physics, orientation, x_pos, y_pos)
|
||||||
|
|
||||||
|
# Initial ball state.
|
||||||
|
physics.named.data.qpos['ball_root'][:2] = self.random.uniform(
|
||||||
|
-spawn_radius, spawn_radius, size=(2,))
|
||||||
|
physics.named.data.qpos['ball_root'][2] = 2
|
||||||
|
physics.named.data.qvel['ball_root'][:2] = 5*self.random.randn(2)
|
||||||
|
super(Fetch, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation to the agent."""
|
||||||
|
obs = _common_observations(physics)
|
||||||
|
obs['ball_state'] = physics.ball_state()
|
||||||
|
obs['target_position'] = physics.target_position()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a reward to the agent."""
|
||||||
|
|
||||||
|
# Reward for moving close to the ball.
|
||||||
|
arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2)
|
||||||
|
workspace_radius = physics.named.model.site_size['workspace', 0]
|
||||||
|
ball_radius = physics.named.model.geom_size['ball', 0]
|
||||||
|
reach_reward = rewards.tolerance(
|
||||||
|
physics.self_to_ball_distance(),
|
||||||
|
bounds=(0, workspace_radius+ball_radius),
|
||||||
|
sigmoid='linear',
|
||||||
|
margin=arena_radius, value_at_margin=0)
|
||||||
|
|
||||||
|
# Reward for bringing the ball to the target.
|
||||||
|
target_radius = physics.named.model.site_size['target', 0]
|
||||||
|
fetch_reward = rewards.tolerance(
|
||||||
|
physics.ball_to_target_distance(),
|
||||||
|
bounds=(0, target_radius),
|
||||||
|
sigmoid='linear',
|
||||||
|
margin=arena_radius, value_at_margin=0)
|
||||||
|
|
||||||
|
reach_then_fetch = reach_reward * (0.5 + 0.5*fetch_reward)
|
||||||
|
|
||||||
|
return _upright_reward(physics) * reach_then_fetch
|
329
Dreamer/local_dm_control_suite/quadruped.xml
Executable file
329
Dreamer/local_dm_control_suite/quadruped.xml
Executable file
@ -0,0 +1,329 @@
|
|||||||
|
<mujoco model="quadruped">
|
||||||
|
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<visual>
|
||||||
|
<rgba rangefinder="1 1 0.1 0.1"/>
|
||||||
|
<map znear=".005" zfar="20"/>
|
||||||
|
</visual>
|
||||||
|
|
||||||
|
<asset>
|
||||||
|
<hfield name="terrain" ncol="201" nrow="201" size="30 30 5 .1"/>
|
||||||
|
</asset>
|
||||||
|
|
||||||
|
<option timestep=".005"/>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<geom solimp=".9 .99 .003" solref=".01 1"/>
|
||||||
|
<default class="body">
|
||||||
|
<geom type="capsule" size=".08" condim="1" material="self" density="500"/>
|
||||||
|
<joint type="hinge" damping="30" armature=".01"
|
||||||
|
limited="true" solimplimit="0 .99 .01"/>
|
||||||
|
<default class="hip">
|
||||||
|
<default class="yaw">
|
||||||
|
<joint axis="0 0 1" range="-50 50"/>
|
||||||
|
</default>
|
||||||
|
<default class="pitch">
|
||||||
|
<joint axis="0 1 0" range="-20 60"/>
|
||||||
|
</default>
|
||||||
|
<geom fromto="0 0 0 .3 0 .11"/>
|
||||||
|
</default>
|
||||||
|
<default class="knee">
|
||||||
|
<joint axis="0 1 0" range="-60 50"/>
|
||||||
|
<geom size=".065" fromto="0 0 0 .25 0 -.25"/>
|
||||||
|
</default>
|
||||||
|
<default class="ankle">
|
||||||
|
<joint axis="0 1 0" range="-45 55"/>
|
||||||
|
<geom size=".055" fromto="0 0 0 0 0 -.25"/>
|
||||||
|
</default>
|
||||||
|
<default class="toe">
|
||||||
|
<geom type="sphere" size=".08" material="effector" friction="1.5"/>
|
||||||
|
<site type="sphere" size=".084" material="site" group="4"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
<default class="rangefinder">
|
||||||
|
<site type="capsule" size=".005 .1" material="site" group="4"/>
|
||||||
|
</default>
|
||||||
|
<default class="wall">
|
||||||
|
<geom type="plane" material="decoration"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<default class="coupling">
|
||||||
|
<equality solimp="0.95 0.99 0.01" solref=".005 .5"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<general ctrllimited="true" gainprm="1000" biasprm="0 -1000" biastype="affine" dyntype="filter" dynprm=".1"/>
|
||||||
|
<default class="yaw_act">
|
||||||
|
<general ctrlrange="-1 1"/>
|
||||||
|
</default>
|
||||||
|
<default class="lift_act">
|
||||||
|
<general ctrlrange="-1 1.1"/>
|
||||||
|
</default>
|
||||||
|
<default class="extend_act">
|
||||||
|
<general ctrlrange="-.8 .8"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<asset>
|
||||||
|
<texture name="ball" builtin="checker" mark="cross" width="151" height="151"
|
||||||
|
rgb1="0.1 0.1 0.1" rgb2="0.9 0.9 0.9" markrgb="1 1 1"/>
|
||||||
|
<material name="ball" texture="ball" />
|
||||||
|
</asset>
|
||||||
|
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<geom name="floor" type="plane" size="15 15 .5" material="grid"/>
|
||||||
|
<geom name="wall_px" class="wall" pos="-15.7 0 .7" zaxis="1 0 1" size="1 15 .5"/>
|
||||||
|
<geom name="wall_py" class="wall" pos="0 -15.7 .7" zaxis="0 1 1" size="15 1 .5"/>
|
||||||
|
<geom name="wall_nx" class="wall" pos="15.7 0 .7" zaxis="-1 0 1" size="1 15 .5"/>
|
||||||
|
<geom name="wall_ny" class="wall" pos="0 15.7 .7" zaxis="0 -1 1" size="15 1 .5"/>
|
||||||
|
<site name="target" type="cylinder" size=".4 .06" pos="0 0 .05" material="target"/>
|
||||||
|
|
||||||
|
<geom name="terrain" type="hfield" hfield="terrain" rgba=".2 .3 .4 1" pos="0 0 -.01"/>
|
||||||
|
|
||||||
|
<camera name="global" pos="-10 10 10" xyaxes="-1 -1 0 1 0 1" mode="trackcom"/>
|
||||||
|
<body name="torso" childclass="body" pos="0 0 .57">
|
||||||
|
<freejoint name="root"/>
|
||||||
|
|
||||||
|
<camera name="x" pos="-1.7 0 1" xyaxes="0 -1 0 .75 0 1" mode="trackcom"/>
|
||||||
|
<camera name="y" pos="0 4 2" xyaxes="-1 0 0 0 -.5 1" mode="trackcom"/>
|
||||||
|
<camera name="egocentric" pos=".3 0 .11" xyaxes="0 -1 0 .4 0 1" fovy="60"/>
|
||||||
|
<light name="light" pos="0 0 4" mode="trackcom"/>
|
||||||
|
|
||||||
|
<geom name="eye_r" type="cylinder" size=".05" fromto=".1 -.07 .12 .31 -.07 .08" mass="0"/>
|
||||||
|
<site name="pupil_r" type="sphere" size=".033" pos=".3 -.07 .08" zaxis="1 0 0" material="eye"/>
|
||||||
|
<geom name="eye_l" type="cylinder" size=".05" fromto=".1 .07 .12 .31 .07 .08" mass="0"/>
|
||||||
|
<site name="pupil_l" type="sphere" size=".033" pos=".3 .07 .08" zaxis="1 0 0" material="eye"/>
|
||||||
|
<site name="workspace" type="sphere" size=".3 .3 .3" material="site" pos=".8 0 -.2" group="3"/>
|
||||||
|
|
||||||
|
<site name="rf_00" class="rangefinder" fromto=".41 -.02 .11 .34 0 .115"/>
|
||||||
|
<site name="rf_01" class="rangefinder" fromto=".41 -.01 .11 .34 0 .115"/>
|
||||||
|
<site name="rf_02" class="rangefinder" fromto=".41 0 .11 .34 0 .115"/>
|
||||||
|
<site name="rf_03" class="rangefinder" fromto=".41 .01 .11 .34 0 .115"/>
|
||||||
|
<site name="rf_04" class="rangefinder" fromto=".41 .02 .11 .34 0 .115"/>
|
||||||
|
<site name="rf_10" class="rangefinder" fromto=".41 -.02 .1 .36 0 .11"/>
|
||||||
|
<site name="rf_11" class="rangefinder" fromto=".41 -.02 .1 .36 0 .11"/>
|
||||||
|
<site name="rf_12" class="rangefinder" fromto=".41 0 .1 .36 0 .11"/>
|
||||||
|
<site name="rf_13" class="rangefinder" fromto=".41 .01 .1 .36 0 .11"/>
|
||||||
|
<site name="rf_14" class="rangefinder" fromto=".41 .02 .1 .36 0 .11"/>
|
||||||
|
<site name="rf_20" class="rangefinder" fromto=".41 -.02 .09 .38 0 .105"/>
|
||||||
|
<site name="rf_21" class="rangefinder" fromto=".41 -.01 .09 .38 0 .105"/>
|
||||||
|
<site name="rf_22" class="rangefinder" fromto=".41 0 .09 .38 0 .105"/>
|
||||||
|
<site name="rf_23" class="rangefinder" fromto=".41 .01 .09 .38 0 .105"/>
|
||||||
|
<site name="rf_24" class="rangefinder" fromto=".41 .02 .09 .38 0 .105"/>
|
||||||
|
<site name="rf_30" class="rangefinder" fromto=".41 -.02 .08 .4 0 .1"/>
|
||||||
|
<site name="rf_31" class="rangefinder" fromto=".41 -.01 .08 .4 0 .1"/>
|
||||||
|
<site name="rf_32" class="rangefinder" fromto=".41 0 .08 .4 0 .1"/>
|
||||||
|
<site name="rf_33" class="rangefinder" fromto=".41 .01 .08 .4 0 .1"/>
|
||||||
|
<site name="rf_34" class="rangefinder" fromto=".41 .02 .08 .4 0 .1"/>
|
||||||
|
|
||||||
|
<geom name="torso" type="ellipsoid" size=".3 .27 .2" density="1000"/>
|
||||||
|
<site name="torso_touch" type="box" size=".26 .26 .26" rgba="0 0 1 0"/>
|
||||||
|
<site name="torso" size=".05" rgba="1 0 0 1" />
|
||||||
|
|
||||||
|
<body name="hip_front_left" pos=".2 .2 0" euler="0 0 45" childclass="hip">
|
||||||
|
<joint name="yaw_front_left" class="yaw"/>
|
||||||
|
<joint name="pitch_front_left" class="pitch"/>
|
||||||
|
<geom name="thigh_front_left"/>
|
||||||
|
<body name="knee_front_left" pos=".3 0 .11" childclass="knee">
|
||||||
|
<joint name="knee_front_left"/>
|
||||||
|
<geom name="shin_front_left"/>
|
||||||
|
<body name="ankle_front_left" pos=".25 0 -.25" childclass="ankle">
|
||||||
|
<joint name="ankle_front_left"/>
|
||||||
|
<geom name="foot_front_left"/>
|
||||||
|
<body name="toe_front_left" pos="0 0 -.3" childclass="toe">
|
||||||
|
<geom name="toe_front_left"/>
|
||||||
|
<site name="toe_front_left"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="hip_front_right" pos=".2 -.2 0" euler="0 0 -45" childclass="hip">
|
||||||
|
<joint name="yaw_front_right" class="yaw"/>
|
||||||
|
<joint name="pitch_front_right" class="pitch"/>
|
||||||
|
<geom name="thigh_front_right"/>
|
||||||
|
<body name="knee_front_right" pos=".3 0 .11" childclass="knee">
|
||||||
|
<joint name="knee_front_right"/>
|
||||||
|
<geom name="shin_front_right"/>
|
||||||
|
<body name="ankle_front_right" pos=".25 0 -.25" childclass="ankle">
|
||||||
|
<joint name="ankle_front_right"/>
|
||||||
|
<geom name="foot_front_right"/>
|
||||||
|
<body name="toe_front_right" pos="0 0 -.3" childclass="toe">
|
||||||
|
<geom name="toe_front_right"/>
|
||||||
|
<site name="toe_front_right"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="hip_back_right" pos="-.2 -.2 0" euler="0 0 -135" childclass="hip">
|
||||||
|
<joint name="yaw_back_right" class="yaw"/>
|
||||||
|
<joint name="pitch_back_right" class="pitch"/>
|
||||||
|
<geom name="thigh_back_right"/>
|
||||||
|
<body name="knee_back_right" pos=".3 0 .11" childclass="knee">
|
||||||
|
<joint name="knee_back_right"/>
|
||||||
|
<geom name="shin_back_right"/>
|
||||||
|
<body name="ankle_back_right" pos=".25 0 -.25" childclass="ankle">
|
||||||
|
<joint name="ankle_back_right"/>
|
||||||
|
<geom name="foot_back_right"/>
|
||||||
|
<body name="toe_back_right" pos="0 0 -.3" childclass="toe">
|
||||||
|
<geom name="toe_back_right"/>
|
||||||
|
<site name="toe_back_right"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="hip_back_left" pos="-.2 .2 0" euler="0 0 135" childclass="hip">
|
||||||
|
<joint name="yaw_back_left" class="yaw"/>
|
||||||
|
<joint name="pitch_back_left" class="pitch"/>
|
||||||
|
<geom name="thigh_back_left"/>
|
||||||
|
<body name="knee_back_left" pos=".3 0 .11" childclass="knee">
|
||||||
|
<joint name="knee_back_left"/>
|
||||||
|
<geom name="shin_back_left"/>
|
||||||
|
<body name="ankle_back_left" pos=".25 0 -.25" childclass="ankle">
|
||||||
|
<joint name="ankle_back_left"/>
|
||||||
|
<geom name="foot_back_left"/>
|
||||||
|
<body name="toe_back_left" pos="0 0 -.3" childclass="toe">
|
||||||
|
<geom name="toe_back_left"/>
|
||||||
|
<site name="toe_back_left"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="ball" pos="0 0 3">
|
||||||
|
<freejoint name="ball_root"/>
|
||||||
|
<geom name="ball" size=".15" material="ball" priority="1" condim="6" friction=".7 .005 .005"
|
||||||
|
solref="-10000 -30"/>
|
||||||
|
<light name="ball_light" pos="0 0 4" mode="trackcom"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<tendon>
|
||||||
|
<fixed name="coupling_front_left">
|
||||||
|
<joint joint="pitch_front_left" coef=".333"/>
|
||||||
|
<joint joint="knee_front_left" coef=".333"/>
|
||||||
|
<joint joint="ankle_front_left" coef=".333"/>
|
||||||
|
</fixed>
|
||||||
|
<fixed name="coupling_front_right">
|
||||||
|
<joint joint="pitch_front_right" coef=".333"/>
|
||||||
|
<joint joint="knee_front_right" coef=".333"/>
|
||||||
|
<joint joint="ankle_front_right" coef=".333"/>
|
||||||
|
</fixed>
|
||||||
|
<fixed name="coupling_back_right">
|
||||||
|
<joint joint="pitch_back_right" coef=".333"/>
|
||||||
|
<joint joint="knee_back_right" coef=".333"/>
|
||||||
|
<joint joint="ankle_back_right" coef=".333"/>
|
||||||
|
</fixed>
|
||||||
|
<fixed name="coupling_back_left">
|
||||||
|
<joint joint="pitch_back_left" coef=".333"/>
|
||||||
|
<joint joint="knee_back_left" coef=".333"/>
|
||||||
|
<joint joint="ankle_back_left" coef=".333"/>
|
||||||
|
</fixed>
|
||||||
|
|
||||||
|
<fixed name="extend_front_left">
|
||||||
|
<joint joint="pitch_front_left" coef=".25"/>
|
||||||
|
<joint joint="knee_front_left" coef="-.5"/>
|
||||||
|
<joint joint="ankle_front_left" coef=".25"/>
|
||||||
|
</fixed>
|
||||||
|
<fixed name="lift_front_left">
|
||||||
|
<joint joint="pitch_front_left" coef=".5"/>
|
||||||
|
<joint joint="ankle_front_left" coef="-.5"/>
|
||||||
|
</fixed>
|
||||||
|
|
||||||
|
<fixed name="extend_front_right">
|
||||||
|
<joint joint="pitch_front_right" coef=".25"/>
|
||||||
|
<joint joint="knee_front_right" coef="-.5"/>
|
||||||
|
<joint joint="ankle_front_right" coef=".25"/>
|
||||||
|
</fixed>
|
||||||
|
<fixed name="lift_front_right">
|
||||||
|
<joint joint="pitch_front_right" coef=".5"/>
|
||||||
|
<joint joint="ankle_front_right" coef="-.5"/>
|
||||||
|
</fixed>
|
||||||
|
|
||||||
|
<fixed name="extend_back_right">
|
||||||
|
<joint joint="pitch_back_right" coef=".25"/>
|
||||||
|
<joint joint="knee_back_right" coef="-.5"/>
|
||||||
|
<joint joint="ankle_back_right" coef=".25"/>
|
||||||
|
</fixed>
|
||||||
|
<fixed name="lift_back_right">
|
||||||
|
<joint joint="pitch_back_right" coef=".5"/>
|
||||||
|
<joint joint="ankle_back_right" coef="-.5"/>
|
||||||
|
</fixed>
|
||||||
|
|
||||||
|
<fixed name="extend_back_left">
|
||||||
|
<joint joint="pitch_back_left" coef=".25"/>
|
||||||
|
<joint joint="knee_back_left" coef="-.5"/>
|
||||||
|
<joint joint="ankle_back_left" coef=".25"/>
|
||||||
|
</fixed>
|
||||||
|
<fixed name="lift_back_left">
|
||||||
|
<joint joint="pitch_back_left" coef=".5"/>
|
||||||
|
<joint joint="ankle_back_left" coef="-.5"/>
|
||||||
|
</fixed>
|
||||||
|
</tendon>
|
||||||
|
|
||||||
|
<equality>
|
||||||
|
<tendon name="coupling_front_left" tendon1="coupling_front_left" class="coupling"/>
|
||||||
|
<tendon name="coupling_front_right" tendon1="coupling_front_right" class="coupling"/>
|
||||||
|
<tendon name="coupling_back_right" tendon1="coupling_back_right" class="coupling"/>
|
||||||
|
<tendon name="coupling_back_left" tendon1="coupling_back_left" class="coupling"/>
|
||||||
|
</equality>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<general name="yaw_front_left" class="yaw_act" joint="yaw_front_left"/>
|
||||||
|
<general name="lift_front_left" class="lift_act" tendon="lift_front_left"/>
|
||||||
|
<general name="extend_front_left" class="extend_act" tendon="extend_front_left"/>
|
||||||
|
<general name="yaw_front_right" class="yaw_act" joint="yaw_front_right"/>
|
||||||
|
<general name="lift_front_right" class="lift_act" tendon="lift_front_right"/>
|
||||||
|
<general name="extend_front_right" class="extend_act" tendon="extend_front_right"/>
|
||||||
|
<general name="yaw_back_right" class="yaw_act" joint="yaw_back_right"/>
|
||||||
|
<general name="lift_back_right" class="lift_act" tendon="lift_back_right"/>
|
||||||
|
<general name="extend_back_right" class="extend_act" tendon="extend_back_right"/>
|
||||||
|
<general name="yaw_back_left" class="yaw_act" joint="yaw_back_left"/>
|
||||||
|
<general name="lift_back_left" class="lift_act" tendon="lift_back_left"/>
|
||||||
|
<general name="extend_back_left" class="extend_act" tendon="extend_back_left"/>
|
||||||
|
</actuator>
|
||||||
|
|
||||||
|
<sensor>
|
||||||
|
<accelerometer name="imu_accel" site="torso"/>
|
||||||
|
<gyro name="imu_gyro" site="torso"/>
|
||||||
|
<velocimeter name="velocimeter" site="torso"/>
|
||||||
|
<force name="force_toe_front_left" site="toe_front_left"/>
|
||||||
|
<force name="force_toe_front_right" site="toe_front_right"/>
|
||||||
|
<force name="force_toe_back_right" site="toe_back_right"/>
|
||||||
|
<force name="force_toe_back_left" site="toe_back_left"/>
|
||||||
|
<torque name="torque_toe_front_left" site="toe_front_left"/>
|
||||||
|
<torque name="torque_toe_front_right" site="toe_front_right"/>
|
||||||
|
<torque name="torque_toe_back_right" site="toe_back_right"/>
|
||||||
|
<torque name="torque_toe_back_left" site="toe_back_left"/>
|
||||||
|
<subtreecom name="center_of_mass" body="torso"/>
|
||||||
|
<rangefinder name="rf_00" site="rf_00"/>
|
||||||
|
<rangefinder name="rf_01" site="rf_01"/>
|
||||||
|
<rangefinder name="rf_02" site="rf_02"/>
|
||||||
|
<rangefinder name="rf_03" site="rf_03"/>
|
||||||
|
<rangefinder name="rf_04" site="rf_04"/>
|
||||||
|
<rangefinder name="rf_10" site="rf_10"/>
|
||||||
|
<rangefinder name="rf_11" site="rf_11"/>
|
||||||
|
<rangefinder name="rf_12" site="rf_12"/>
|
||||||
|
<rangefinder name="rf_13" site="rf_13"/>
|
||||||
|
<rangefinder name="rf_14" site="rf_14"/>
|
||||||
|
<rangefinder name="rf_20" site="rf_20"/>
|
||||||
|
<rangefinder name="rf_21" site="rf_21"/>
|
||||||
|
<rangefinder name="rf_22" site="rf_22"/>
|
||||||
|
<rangefinder name="rf_23" site="rf_23"/>
|
||||||
|
<rangefinder name="rf_24" site="rf_24"/>
|
||||||
|
<rangefinder name="rf_30" site="rf_30"/>
|
||||||
|
<rangefinder name="rf_31" site="rf_31"/>
|
||||||
|
<rangefinder name="rf_32" site="rf_32"/>
|
||||||
|
<rangefinder name="rf_33" site="rf_33"/>
|
||||||
|
<rangefinder name="rf_34" site="rf_34"/>
|
||||||
|
</sensor>
|
||||||
|
|
||||||
|
</mujoco>
|
||||||
|
|
116
Dreamer/local_dm_control_suite/reacher.py
Executable file
116
Dreamer/local_dm_control_suite/reacher.py
Executable file
@ -0,0 +1,116 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Reacher domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.suite.utils import randomizers
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
_DEFAULT_TIME_LIMIT = 20
|
||||||
|
_BIG_TARGET = .05
|
||||||
|
_SMALL_TARGET = .015
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('reacher.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking', 'easy')
|
||||||
|
def easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns reacher with sparse reward with 5e-2 tol and randomized target."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Reacher(target_size=_BIG_TARGET, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns reacher with sparse reward with 1e-2 tol and randomized target."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = Reacher(target_size=_SMALL_TARGET, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, **environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the Reacher domain."""
|
||||||
|
|
||||||
|
def finger_to_target(self):
|
||||||
|
"""Returns the vector from target to finger in global coordinates."""
|
||||||
|
return (self.named.data.geom_xpos['target', :2] -
|
||||||
|
self.named.data.geom_xpos['finger', :2])
|
||||||
|
|
||||||
|
def finger_to_target_dist(self):
|
||||||
|
"""Returns the signed distance between the finger and target surface."""
|
||||||
|
return np.linalg.norm(self.finger_to_target())
|
||||||
|
|
||||||
|
|
||||||
|
class Reacher(base.Task):
|
||||||
|
"""A reacher `Task` to reach the target."""
|
||||||
|
|
||||||
|
def __init__(self, target_size, random=None):
|
||||||
|
"""Initialize an instance of `Reacher`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
target_size: A `float`, tolerance to determine whether finger reached the
|
||||||
|
target.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._target_size = target_size
|
||||||
|
super(Reacher, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode."""
|
||||||
|
physics.named.model.geom_size['target', 0] = self._target_size
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, self.random)
|
||||||
|
|
||||||
|
# Randomize target position
|
||||||
|
angle = self.random.uniform(0, 2 * np.pi)
|
||||||
|
radius = self.random.uniform(.05, .20)
|
||||||
|
physics.named.model.geom_pos['target', 'x'] = radius * np.sin(angle)
|
||||||
|
physics.named.model.geom_pos['target', 'y'] = radius * np.cos(angle)
|
||||||
|
|
||||||
|
super(Reacher, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of the state and the target position."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['position'] = physics.position()
|
||||||
|
obs['to_target'] = physics.finger_to_target()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
radii = physics.named.model.geom_size[['target', 'finger'], 0].sum()
|
||||||
|
return rewards.tolerance(physics.finger_to_target_dist(), (0, radii))
|
47
Dreamer/local_dm_control_suite/reacher.xml
Executable file
47
Dreamer/local_dm_control_suite/reacher.xml
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
<mujoco model="two-link planar reacher">
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<option timestep="0.02">
|
||||||
|
<flag contact="disable"/>
|
||||||
|
</option>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<joint type="hinge" axis="0 0 1" damping="0.01"/>
|
||||||
|
<motor gear=".05" ctrlrange="-1 1" ctrllimited="true"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<light name="light" pos="0 0 1"/>
|
||||||
|
<camera name="fixed" pos="0 0 .75" quat="1 0 0 0"/>
|
||||||
|
<!-- Arena -->
|
||||||
|
<geom name="ground" type="plane" pos="0 0 0" size=".3 .3 10" material="grid"/>
|
||||||
|
<geom name="wall_x" type="plane" pos="-.3 0 .02" zaxis="1 0 0" size=".02 .3 .02" material="decoration"/>
|
||||||
|
<geom name="wall_y" type="plane" pos="0 -.3 .02" zaxis="0 1 0" size=".3 .02 .02" material="decoration"/>
|
||||||
|
<geom name="wall_neg_x" type="plane" pos=".3 0 .02" zaxis="-1 0 0" size=".02 .3 .02" material="decoration"/>
|
||||||
|
<geom name="wall_neg_y" type="plane" pos="0 .3 .02" zaxis="0 -1 0" size=".3 .02 .02" material="decoration"/>
|
||||||
|
|
||||||
|
<!-- Arm -->
|
||||||
|
<geom name="root" type="cylinder" fromto="0 0 0 0 0 0.02" size=".011" material="decoration"/>
|
||||||
|
<body name="arm" pos="0 0 .01">
|
||||||
|
<geom name="arm" type="capsule" fromto="0 0 0 0.12 0 0" size=".01" material="self"/>
|
||||||
|
<joint name="shoulder"/>
|
||||||
|
<body name="hand" pos=".12 0 0">
|
||||||
|
<geom name="hand" type="capsule" fromto="0 0 0 0.1 0 0" size=".01" material="self"/>
|
||||||
|
<joint name="wrist" limited="true" range="-160 160"/>
|
||||||
|
<body name="finger" pos=".12 0 0">
|
||||||
|
<camera name="hand" pos="0 0 .2" mode="track"/>
|
||||||
|
<geom name="finger" type="sphere" size=".01" material="effector"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<!-- Target -->
|
||||||
|
<geom name="target" pos="0 0 .01" material="target" type="sphere" size=".05"/>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="shoulder" joint="shoulder"/>
|
||||||
|
<motor name="wrist" joint="wrist"/>
|
||||||
|
</actuator>
|
||||||
|
</mujoco>
|
208
Dreamer/local_dm_control_suite/stacker.py
Executable file
208
Dreamer/local_dm_control_suite/stacker.py
Executable file
@ -0,0 +1,208 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Planar Stacker domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
from dm_control.utils import xml_tools
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
_CLOSE = .01 # (Meters) Distance below which a thing is considered close.
|
||||||
|
_CONTROL_TIMESTEP = .01 # (Seconds)
|
||||||
|
_TIME_LIMIT = 10 # (Seconds)
|
||||||
|
_ARM_JOINTS = ['arm_root', 'arm_shoulder', 'arm_elbow', 'arm_wrist',
|
||||||
|
'finger', 'fingertip', 'thumb', 'thumbtip']
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def make_model(n_boxes):
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
xml_string = common.read_model('stacker.xml')
|
||||||
|
parser = etree.XMLParser(remove_blank_text=True)
|
||||||
|
mjcf = etree.XML(xml_string, parser)
|
||||||
|
|
||||||
|
# Remove unused boxes
|
||||||
|
for b in range(n_boxes, 4):
|
||||||
|
box = xml_tools.find_element(mjcf, 'body', 'box' + str(b))
|
||||||
|
box.getparent().remove(box)
|
||||||
|
|
||||||
|
return etree.tostring(mjcf, pretty_print=True), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('hard')
|
||||||
|
def stack_2(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns stacker task with 2 boxes."""
|
||||||
|
n_boxes = 2
|
||||||
|
physics = Physics.from_xml_string(*make_model(n_boxes=n_boxes))
|
||||||
|
task = Stack(n_boxes=n_boxes,
|
||||||
|
fully_observable=fully_observable,
|
||||||
|
random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('hard')
|
||||||
|
def stack_4(fully_observable=True, time_limit=_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns stacker task with 4 boxes."""
|
||||||
|
n_boxes = 4
|
||||||
|
physics = Physics.from_xml_string(*make_model(n_boxes=n_boxes))
|
||||||
|
task = Stack(n_boxes=n_boxes,
|
||||||
|
fully_observable=fully_observable,
|
||||||
|
random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics with additional features for the Planar Manipulator domain."""
|
||||||
|
|
||||||
|
def bounded_joint_pos(self, joint_names):
|
||||||
|
"""Returns joint positions as (sin, cos) values."""
|
||||||
|
joint_pos = self.named.data.qpos[joint_names]
|
||||||
|
return np.vstack([np.sin(joint_pos), np.cos(joint_pos)]).T
|
||||||
|
|
||||||
|
def joint_vel(self, joint_names):
|
||||||
|
"""Returns joint velocities."""
|
||||||
|
return self.named.data.qvel[joint_names]
|
||||||
|
|
||||||
|
def body_2d_pose(self, body_names, orientation=True):
|
||||||
|
"""Returns positions and/or orientations of bodies."""
|
||||||
|
if not isinstance(body_names, str):
|
||||||
|
body_names = np.array(body_names).reshape(-1, 1) # Broadcast indices.
|
||||||
|
pos = self.named.data.xpos[body_names, ['x', 'z']]
|
||||||
|
if orientation:
|
||||||
|
ori = self.named.data.xquat[body_names, ['qw', 'qy']]
|
||||||
|
return np.hstack([pos, ori])
|
||||||
|
else:
|
||||||
|
return pos
|
||||||
|
|
||||||
|
def touch(self):
|
||||||
|
return np.log1p(self.data.sensordata)
|
||||||
|
|
||||||
|
def site_distance(self, site1, site2):
|
||||||
|
site1_to_site2 = np.diff(self.named.data.site_xpos[[site2, site1]], axis=0)
|
||||||
|
return np.linalg.norm(site1_to_site2)
|
||||||
|
|
||||||
|
|
||||||
|
class Stack(base.Task):
|
||||||
|
"""A Stack `Task`: stack the boxes."""
|
||||||
|
|
||||||
|
def __init__(self, n_boxes, fully_observable, random=None):
|
||||||
|
"""Initialize an instance of the `Stack` task.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
n_boxes: An `int`, number of boxes to stack.
|
||||||
|
fully_observable: A `bool`, whether the observation should contain the
|
||||||
|
positions and velocities of the boxes and the location of the target.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._n_boxes = n_boxes
|
||||||
|
self._box_names = ['box' + str(b) for b in range(n_boxes)]
|
||||||
|
self._box_joint_names = []
|
||||||
|
for name in self._box_names:
|
||||||
|
for dim in 'xyz':
|
||||||
|
self._box_joint_names.append('_'.join([name, dim]))
|
||||||
|
self._fully_observable = fully_observable
|
||||||
|
super(Stack, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode."""
|
||||||
|
# Local aliases
|
||||||
|
randint = self.random.randint
|
||||||
|
uniform = self.random.uniform
|
||||||
|
model = physics.named.model
|
||||||
|
data = physics.named.data
|
||||||
|
|
||||||
|
# Find a collision-free random initial configuration.
|
||||||
|
penetrating = True
|
||||||
|
while penetrating:
|
||||||
|
|
||||||
|
# Randomise angles of arm joints.
|
||||||
|
is_limited = model.jnt_limited[_ARM_JOINTS].astype(np.bool)
|
||||||
|
joint_range = model.jnt_range[_ARM_JOINTS]
|
||||||
|
lower_limits = np.where(is_limited, joint_range[:, 0], -np.pi)
|
||||||
|
upper_limits = np.where(is_limited, joint_range[:, 1], np.pi)
|
||||||
|
angles = uniform(lower_limits, upper_limits)
|
||||||
|
data.qpos[_ARM_JOINTS] = angles
|
||||||
|
|
||||||
|
# Symmetrize hand.
|
||||||
|
data.qpos['finger'] = data.qpos['thumb']
|
||||||
|
|
||||||
|
# Randomise target location.
|
||||||
|
target_height = 2*randint(self._n_boxes) + 1
|
||||||
|
box_size = model.geom_size['target', 0]
|
||||||
|
model.body_pos['target', 'z'] = box_size * target_height
|
||||||
|
model.body_pos['target', 'x'] = uniform(-.37, .37)
|
||||||
|
|
||||||
|
# Randomise box locations.
|
||||||
|
for name in self._box_names:
|
||||||
|
data.qpos[name + '_x'] = uniform(.1, .3)
|
||||||
|
data.qpos[name + '_z'] = uniform(0, .7)
|
||||||
|
data.qpos[name + '_y'] = uniform(0, 2*np.pi)
|
||||||
|
|
||||||
|
# Check for collisions.
|
||||||
|
physics.after_reset()
|
||||||
|
penetrating = physics.data.ncon > 0
|
||||||
|
|
||||||
|
super(Stack, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns either features or only sensors (to be used with pixels)."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['arm_pos'] = physics.bounded_joint_pos(_ARM_JOINTS)
|
||||||
|
obs['arm_vel'] = physics.joint_vel(_ARM_JOINTS)
|
||||||
|
obs['touch'] = physics.touch()
|
||||||
|
if self._fully_observable:
|
||||||
|
obs['hand_pos'] = physics.body_2d_pose('hand')
|
||||||
|
obs['box_pos'] = physics.body_2d_pose(self._box_names)
|
||||||
|
obs['box_vel'] = physics.joint_vel(self._box_joint_names)
|
||||||
|
obs['target_pos'] = physics.body_2d_pose('target', orientation=False)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a reward to the agent."""
|
||||||
|
box_size = physics.named.model.geom_size['target', 0]
|
||||||
|
min_box_to_target_distance = min(physics.site_distance(name, 'target')
|
||||||
|
for name in self._box_names)
|
||||||
|
box_is_close = rewards.tolerance(min_box_to_target_distance,
|
||||||
|
margin=2*box_size)
|
||||||
|
hand_to_target_distance = physics.site_distance('grasp', 'target')
|
||||||
|
hand_is_far = rewards.tolerance(hand_to_target_distance,
|
||||||
|
bounds=(.1, float('inf')),
|
||||||
|
margin=_CLOSE)
|
||||||
|
return box_is_close * hand_is_far
|
193
Dreamer/local_dm_control_suite/stacker.xml
Executable file
193
Dreamer/local_dm_control_suite/stacker.xml
Executable file
@ -0,0 +1,193 @@
|
|||||||
|
<mujoco model="planar stacker">
|
||||||
|
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/materials_white_floor.xml"/>
|
||||||
|
<asset>
|
||||||
|
<texture name="background" builtin="flat" type="2d" mark="random" markrgb="1 1 1" width="800" height="800" rgb1=".2 .3 .4"/>
|
||||||
|
<material name="background" texture="background" texrepeat="1 1" texuniform="true"/>
|
||||||
|
</asset>
|
||||||
|
|
||||||
|
<visual>
|
||||||
|
<map shadowclip=".5"/>
|
||||||
|
<quality shadowsize="2048"/>
|
||||||
|
</visual>>
|
||||||
|
|
||||||
|
<option timestep="0.001" cone="elliptic"/>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<geom friction=".7" solimp="0.9 0.97 0.001" solref=".01 1"/>
|
||||||
|
<joint solimplimit="0 0.99 0.01" solreflimit=".005 1"/>
|
||||||
|
<general ctrllimited="true"/>
|
||||||
|
<tendon width="0.01"/>
|
||||||
|
<site size=".003 .003 .003" material="site" group="3"/>
|
||||||
|
|
||||||
|
<default class="arm">
|
||||||
|
<geom type="capsule" material="self" density="500"/>
|
||||||
|
<joint type="hinge" pos="0 0 0" axis="0 -1 0" limited="true"/>
|
||||||
|
<default class="hand">
|
||||||
|
<joint damping=".5" range="-10 60"/>
|
||||||
|
<geom size=".008"/>
|
||||||
|
<site type="box" size=".018 .005 .005" pos=".022 0 -.002" euler="0 15 0" group="4"/>
|
||||||
|
<default class="fingertip">
|
||||||
|
<geom type="sphere" size=".008" material="effector"/>
|
||||||
|
<joint damping=".01" stiffness=".01" range="-40 20"/>
|
||||||
|
<site size=".012 .005 .008" pos=".003 0 .003" group="4" euler="0 0 0"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<default class="object">
|
||||||
|
<geom material="self"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<default class="task">
|
||||||
|
<site rgba="0 0 0 0"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<default class="obstacle">
|
||||||
|
<geom material="decoration" friction="0"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<default class="ghost">
|
||||||
|
<geom material="target" contype="0" conaffinity="0"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<!-- Arena -->
|
||||||
|
<light name="light" directional="true" diffuse=".6 .6 .6" pos="0 0 1" specular=".3 .3 .3"/>
|
||||||
|
<geom name="floor" type="plane" pos="0 0 0" size=".4 .2 10" material="grid"/>
|
||||||
|
<geom name="wall1" type="plane" pos="-.682843 0 .282843" size=".4 .2 10" material="grid" zaxis="1 0 1"/>
|
||||||
|
<geom name="wall2" type="plane" pos=".682843 0 .282843" size=".4 .2 10" material="grid" zaxis="-1 0 1"/>
|
||||||
|
<geom name="background" type="plane" pos="0 .2 .5" size="1 .5 10" material="background" zaxis="0 -1 0"/>
|
||||||
|
<camera name="fixed" pos="0 -16 .4" xyaxes="1 0 0 0 0 1" fovy="4"/>
|
||||||
|
|
||||||
|
<!-- Arm -->
|
||||||
|
<geom name="arm_root" type="cylinder" fromto="0 -.022 .4 0 .022 .4" size=".024"
|
||||||
|
material="decoration" contype="0" conaffinity="0"/>
|
||||||
|
<body name="upper_arm" pos="0 0 .4" childclass="arm">
|
||||||
|
<joint name="arm_root" damping="2" limited="false"/>
|
||||||
|
<geom name="upper_arm" size=".02" fromto="0 0 0 0 0 .18"/>
|
||||||
|
<body name="middle_arm" pos="0 0 .18" childclass="arm">
|
||||||
|
<joint name="arm_shoulder" damping="1.5" range="-160 160"/>
|
||||||
|
<geom name="middle_arm" size=".017" fromto="0 0 0 0 0 .15"/>
|
||||||
|
<body name="lower_arm" pos="0 0 .15">
|
||||||
|
<joint name="arm_elbow" damping="1" range="-160 160"/>
|
||||||
|
<geom name="lower_arm" size=".014" fromto="0 0 0 0 0 .12"/>
|
||||||
|
<body name="hand" pos="0 0 .12">
|
||||||
|
<joint name="arm_wrist" damping=".5" range="-140 140" />
|
||||||
|
<geom name="hand" size=".011" fromto="0 0 0 0 0 .03"/>
|
||||||
|
<geom name="palm1" fromto="0 0 .03 .03 0 .045" class="hand"/>
|
||||||
|
<geom name="palm2" fromto="0 0 .03 -.03 0 .045" class="hand"/>
|
||||||
|
<site name="grasp" pos="0 0 .065"/>
|
||||||
|
<body name="pinch site" pos="0 0 .090">
|
||||||
|
<site name="pinch"/>
|
||||||
|
<inertial pos="0 0 0" mass="1e-6" diaginertia="1e-12 1e-12 1e-12"/>
|
||||||
|
<camera name="hand" pos="0 -.3 0" xyaxes="1 0 0 0 0 1" mode="track"/>
|
||||||
|
</body>
|
||||||
|
<site name="palm_touch" type="box" group="4" size=".025 .005 .008" pos="0 0 .043"/>
|
||||||
|
|
||||||
|
<body name="thumb" pos=".03 0 .045" euler="0 -90 0" childclass="hand">
|
||||||
|
<joint name="thumb"/>
|
||||||
|
<geom name="thumb1" fromto="0 0 0 .02 0 -.01" size=".007"/>
|
||||||
|
<geom name="thumb2" fromto=".02 0 -.01 .04 0 -.01" size=".007"/>
|
||||||
|
<site name="thumb_touch" group="4"/>
|
||||||
|
<body name="thumbtip" pos=".05 0 -.01" childclass="fingertip">
|
||||||
|
<joint name="thumbtip"/>
|
||||||
|
<geom name="thumbtip1" pos="-.003 0 0" />
|
||||||
|
<geom name="thumbtip2" pos=".003 0 0" />
|
||||||
|
<site name="thumbtip_touch" group="4"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="finger" pos="-.03 0 .045" euler="0 90 180" childclass="hand">
|
||||||
|
<joint name="finger"/>
|
||||||
|
<geom name="finger1" fromto="0 0 0 .02 0 -.01" size=".007" />
|
||||||
|
<geom name="finger2" fromto=".02 0 -.01 .04 0 -.01" size=".007"/>
|
||||||
|
<site name="finger_touch"/>
|
||||||
|
<body name="fingertip" pos=".05 0 -.01" childclass="fingertip">
|
||||||
|
<joint name="fingertip"/>
|
||||||
|
<geom name="fingertip1" pos="-.003 0 0" />
|
||||||
|
<geom name="fingertip2" pos=".003 0 0" />
|
||||||
|
<site name="fingertip_touch"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<!-- props -->
|
||||||
|
<body name="box0" pos=".5 0 .4" childclass="object">
|
||||||
|
<joint name="box0_x" type="slide" axis="1 0 0" ref=".5"/>
|
||||||
|
<joint name="box0_z" type="slide" axis="0 0 1" ref=".4"/>
|
||||||
|
<joint name="box0_y" type="hinge" axis="0 1 0"/>
|
||||||
|
<geom name="box0" type="box" size=".022 .022 .022" />
|
||||||
|
<site name="box0" type="sphere"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="box1" pos=".4 0 .4" childclass="object">
|
||||||
|
<joint name="box1_x" type="slide" axis="1 0 0" ref=".4"/>
|
||||||
|
<joint name="box1_z" type="slide" axis="0 0 1" ref=".4"/>
|
||||||
|
<joint name="box1_y" type="hinge" axis="0 1 0"/>
|
||||||
|
<geom name="box1" type="box" size=".022 .022 .022" />
|
||||||
|
<site name="box1" type="sphere"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="box2" pos=".3 0 .4" childclass="object">
|
||||||
|
<joint name="box2_x" type="slide" axis="1 0 0" ref=".3"/>
|
||||||
|
<joint name="box2_z" type="slide" axis="0 0 1" ref=".4"/>
|
||||||
|
<joint name="box2_y" type="hinge" axis="0 1 0"/>
|
||||||
|
<geom name="box2" type="box" size=".022 .022 .022" />
|
||||||
|
<site name="box2" type="sphere"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<body name="box3" pos=".2 0 .4" childclass="object">
|
||||||
|
<joint name="box3_x" type="slide" axis="1 0 0" ref=".2"/>
|
||||||
|
<joint name="box3_z" type="slide" axis="0 0 1" ref=".4"/>
|
||||||
|
<joint name="box3_y" type="hinge" axis="0 1 0"/>
|
||||||
|
<geom name="box3" type="box" size=".022 .022 .022" />
|
||||||
|
<site name="box3" type="sphere"/>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- targets -->
|
||||||
|
<body name="target" pos="0 .001 .022" childclass="ghost">
|
||||||
|
<geom name="target" type="box" size=".022 .022 .022" />
|
||||||
|
<site name="target" type="sphere"/>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<tendon>
|
||||||
|
<fixed name="grasp">
|
||||||
|
<joint joint="thumb" coef=".5"/>
|
||||||
|
<joint joint="finger" coef=".5"/>
|
||||||
|
</fixed>
|
||||||
|
<fixed name="coupling">
|
||||||
|
<joint joint="thumb" coef="-.5"/>
|
||||||
|
<joint joint="finger" coef=".5"/>
|
||||||
|
</fixed>
|
||||||
|
</tendon>
|
||||||
|
|
||||||
|
<equality>
|
||||||
|
<tendon name="coupling" tendon1="coupling" solimp="0.95 0.99 0.001" solref=".005 .5"/>
|
||||||
|
</equality>
|
||||||
|
|
||||||
|
<sensor>
|
||||||
|
<touch name="palm_touch" site="palm_touch"/>
|
||||||
|
<touch name="finger_touch" site="finger_touch"/>
|
||||||
|
<touch name="thumb_touch" site="thumb_touch"/>
|
||||||
|
<touch name="fingertip_touch" site="fingertip_touch"/>
|
||||||
|
<touch name="thumbtip_touch" site="thumbtip_touch"/>
|
||||||
|
</sensor>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="root" joint="arm_root" ctrlrange="-1 1" gear="12"/>
|
||||||
|
<motor name="shoulder" joint="arm_shoulder" ctrlrange="-1 1" gear="8"/>
|
||||||
|
<motor name="elbow" joint="arm_elbow" ctrlrange="-1 1" gear="4"/>
|
||||||
|
<motor name="wrist" joint="arm_wrist" ctrlrange="-1 1" gear="2"/>
|
||||||
|
<motor name="grasp" tendon="grasp" ctrlrange="-1 1" gear="2"/>
|
||||||
|
</actuator>
|
||||||
|
|
||||||
|
</mujoco>
|
215
Dreamer/local_dm_control_suite/swimmer.py
Executable file
215
Dreamer/local_dm_control_suite/swimmer.py
Executable file
@ -0,0 +1,215 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Procedurally generated Swimmer domain."""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.suite.utils import randomizers
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
from lxml import etree
|
||||||
|
import numpy as np
|
||||||
|
from six.moves import range
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 30
|
||||||
|
_CONTROL_TIMESTEP = .03 # (Seconds)
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets(n_joints):
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
n_joints: An integer specifying the number of joints in the swimmer.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tuple `(model_xml_string, assets)`, where `assets` is a dict consisting of
|
||||||
|
`{filename: contents_string}` pairs.
|
||||||
|
"""
|
||||||
|
return _make_model(n_joints), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def swimmer6(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns a 6-link swimmer."""
|
||||||
|
return _make_swimmer(6, time_limit, random=random,
|
||||||
|
environment_kwargs=environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def swimmer15(time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns a 15-link swimmer."""
|
||||||
|
return _make_swimmer(15, time_limit, random=random,
|
||||||
|
environment_kwargs=environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def swimmer(n_links=3, time_limit=_DEFAULT_TIME_LIMIT,
|
||||||
|
random=None, environment_kwargs=None):
|
||||||
|
"""Returns a swimmer with n links."""
|
||||||
|
return _make_swimmer(n_links, time_limit, random=random,
|
||||||
|
environment_kwargs=environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_swimmer(n_joints, time_limit=_DEFAULT_TIME_LIMIT, random=None,
|
||||||
|
environment_kwargs=None):
|
||||||
|
"""Returns a swimmer control environment."""
|
||||||
|
model_string, assets = get_model_and_assets(n_joints)
|
||||||
|
physics = Physics.from_xml_string(model_string, assets=assets)
|
||||||
|
task = Swimmer(random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_model(n_bodies):
|
||||||
|
"""Generates an xml string defining a swimmer with `n_bodies` bodies."""
|
||||||
|
if n_bodies < 3:
|
||||||
|
raise ValueError('At least 3 bodies required. Received {}'.format(n_bodies))
|
||||||
|
mjcf = etree.fromstring(common.read_model('swimmer.xml'))
|
||||||
|
head_body = mjcf.find('./worldbody/body')
|
||||||
|
actuator = etree.SubElement(mjcf, 'actuator')
|
||||||
|
sensor = etree.SubElement(mjcf, 'sensor')
|
||||||
|
|
||||||
|
parent = head_body
|
||||||
|
for body_index in range(n_bodies - 1):
|
||||||
|
site_name = 'site_{}'.format(body_index)
|
||||||
|
child = _make_body(body_index=body_index)
|
||||||
|
child.append(etree.Element('site', name=site_name))
|
||||||
|
joint_name = 'joint_{}'.format(body_index)
|
||||||
|
joint_limit = 360.0/n_bodies
|
||||||
|
joint_range = '{} {}'.format(-joint_limit, joint_limit)
|
||||||
|
child.append(etree.Element('joint', {'name': joint_name,
|
||||||
|
'range': joint_range}))
|
||||||
|
motor_name = 'motor_{}'.format(body_index)
|
||||||
|
actuator.append(etree.Element('motor', name=motor_name, joint=joint_name))
|
||||||
|
velocimeter_name = 'velocimeter_{}'.format(body_index)
|
||||||
|
sensor.append(etree.Element('velocimeter', name=velocimeter_name,
|
||||||
|
site=site_name))
|
||||||
|
gyro_name = 'gyro_{}'.format(body_index)
|
||||||
|
sensor.append(etree.Element('gyro', name=gyro_name, site=site_name))
|
||||||
|
parent.append(child)
|
||||||
|
parent = child
|
||||||
|
|
||||||
|
# Move tracking cameras further away from the swimmer according to its length.
|
||||||
|
cameras = mjcf.findall('./worldbody/body/camera')
|
||||||
|
scale = n_bodies / 6.0
|
||||||
|
for cam in cameras:
|
||||||
|
if cam.get('mode') == 'trackcom':
|
||||||
|
old_pos = cam.get('pos').split(' ')
|
||||||
|
new_pos = ' '.join([str(float(dim) * scale) for dim in old_pos])
|
||||||
|
cam.set('pos', new_pos)
|
||||||
|
|
||||||
|
return etree.tostring(mjcf, pretty_print=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_body(body_index):
|
||||||
|
"""Generates an xml string defining a single physical body."""
|
||||||
|
body_name = 'segment_{}'.format(body_index)
|
||||||
|
visual_name = 'visual_{}'.format(body_index)
|
||||||
|
inertial_name = 'inertial_{}'.format(body_index)
|
||||||
|
body = etree.Element('body', name=body_name)
|
||||||
|
body.set('pos', '0 .1 0')
|
||||||
|
etree.SubElement(body, 'geom', {'class': 'visual', 'name': visual_name})
|
||||||
|
etree.SubElement(body, 'geom', {'class': 'inertial', 'name': inertial_name})
|
||||||
|
return body
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the swimmer domain."""
|
||||||
|
|
||||||
|
def nose_to_target(self):
|
||||||
|
"""Returns a vector from nose to target in local coordinate of the head."""
|
||||||
|
nose_to_target = (self.named.data.geom_xpos['target'] -
|
||||||
|
self.named.data.geom_xpos['nose'])
|
||||||
|
head_orientation = self.named.data.xmat['head'].reshape(3, 3)
|
||||||
|
return nose_to_target.dot(head_orientation)[:2]
|
||||||
|
|
||||||
|
def nose_to_target_dist(self):
|
||||||
|
"""Returns the distance from the nose to the target."""
|
||||||
|
return np.linalg.norm(self.nose_to_target())
|
||||||
|
|
||||||
|
def body_velocities(self):
|
||||||
|
"""Returns local body velocities: x,y linear, z rotational."""
|
||||||
|
xvel_local = self.data.sensordata[12:].reshape((-1, 6))
|
||||||
|
vx_vy_wz = [0, 1, 5] # Indices for linear x,y vels and rotational z vel.
|
||||||
|
return xvel_local[:, vx_vy_wz].ravel()
|
||||||
|
|
||||||
|
def joints(self):
|
||||||
|
"""Returns all internal joint angles (excluding root joints)."""
|
||||||
|
return self.data.qpos[3:].copy()
|
||||||
|
|
||||||
|
|
||||||
|
class Swimmer(base.Task):
|
||||||
|
"""A swimmer `Task` to reach the target or just swim."""
|
||||||
|
|
||||||
|
def __init__(self, random=None):
|
||||||
|
"""Initializes an instance of `Swimmer`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
super(Swimmer, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode.
|
||||||
|
|
||||||
|
Initializes the swimmer orientation to [-pi, pi) and the relative joint
|
||||||
|
angle of each joint uniformly within its range.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
"""
|
||||||
|
# Random joint angles:
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, self.random)
|
||||||
|
# Random target position.
|
||||||
|
close_target = self.random.rand() < .2 # Probability of a close target.
|
||||||
|
target_box = .3 if close_target else 2
|
||||||
|
xpos, ypos = self.random.uniform(-target_box, target_box, size=2)
|
||||||
|
physics.named.model.geom_pos['target', 'x'] = xpos
|
||||||
|
physics.named.model.geom_pos['target', 'y'] = ypos
|
||||||
|
physics.named.model.light_pos['target_light', 'x'] = xpos
|
||||||
|
physics.named.model.light_pos['target_light', 'y'] = ypos
|
||||||
|
|
||||||
|
super(Swimmer, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of joint angles, body velocities and target."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['joints'] = physics.joints()
|
||||||
|
obs['to_target'] = physics.nose_to_target()
|
||||||
|
obs['body_velocities'] = physics.body_velocities()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a smooth reward."""
|
||||||
|
target_size = physics.named.model.geom_size['target', 0]
|
||||||
|
return rewards.tolerance(physics.nose_to_target_dist(),
|
||||||
|
bounds=(0, target_size),
|
||||||
|
margin=5*target_size,
|
||||||
|
sigmoid='long_tail')
|
57
Dreamer/local_dm_control_suite/swimmer.xml
Executable file
57
Dreamer/local_dm_control_suite/swimmer.xml
Executable file
@ -0,0 +1,57 @@
|
|||||||
|
<mujoco model="swimmer">
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/materials.xml"/>
|
||||||
|
|
||||||
|
<option timestep="0.002" density="3000">
|
||||||
|
<flag contact="disable"/>
|
||||||
|
</option>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<default class="swimmer">
|
||||||
|
<joint type="hinge" pos="0 -.05 0" axis="0 0 1" limited="true" solreflimit=".05 1" solimplimit="0 .8 .1" armature="1e-6"/>
|
||||||
|
<default class="inertial">
|
||||||
|
<geom type="box" size=".001 .05 .01" rgba="0 0 0 0" mass=".01"/>
|
||||||
|
</default>
|
||||||
|
<default class="visual">
|
||||||
|
<geom type="capsule" size=".01" fromto="0 -.05 0 0 .05 0" material="self" mass="0"/>
|
||||||
|
</default>
|
||||||
|
<site size=".01" rgba="0 0 0 0"/>
|
||||||
|
</default>
|
||||||
|
<default class="free">
|
||||||
|
<joint limited="false" stiffness="0" armature="0"/>
|
||||||
|
</default>
|
||||||
|
<motor gear="5e-4" ctrllimited="true" ctrlrange="-1 1"/>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<geom name="ground" type="plane" size="2 2 0.1" material="grid"/>
|
||||||
|
<body name="head" pos="0 0 .05" childclass="swimmer">
|
||||||
|
<light name="light_1" diffuse=".8 .8 .8" pos="0 0 1.5"/>
|
||||||
|
<geom name="head" type="ellipsoid" size=".02 .04 .017" pos="0 -.022 0" material="self" mass="0"/>
|
||||||
|
<geom name="nose" type="sphere" pos="0 -.06 0" size=".004" material="effector" mass="0"/>
|
||||||
|
<geom name="eyes" type="capsule" fromto="-.006 -.054 .005 .006 -.054 .005" size=".004" material="eye" mass="0"/>
|
||||||
|
<camera name="tracking1" pos="0 -.2 .5" xyaxes="1 0 0 0 1 1" mode="trackcom" fovy="60"/>
|
||||||
|
<camera name="tracking2" pos="-.9 .5 .15" xyaxes="0 -1 0 .3 0 1" mode="trackcom" fovy="60"/>
|
||||||
|
<camera name="eyes" pos="0 -.058 .005" xyaxes="-1 0 0 0 0 1"/>
|
||||||
|
<joint name="rootx" class="free" type="slide" axis="1 0 0" pos="0 -.05 0"/>
|
||||||
|
<joint name="rooty" class="free" type="slide" axis="0 1 0" pos="0 -.05 0"/>
|
||||||
|
<joint name="rootz" class="free" type="hinge" axis="0 0 1" pos="0 -.05 0"/>
|
||||||
|
<geom name="inertial" class="inertial"/>
|
||||||
|
<geom name="visual" class="visual"/>
|
||||||
|
<site name="head"/>
|
||||||
|
</body>
|
||||||
|
<geom name="target" type="sphere" pos="1 1 .05" size=".1" material="target"/>
|
||||||
|
<light name="target_light" diffuse="1 1 1" pos="1 1 1.5"/>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<sensor>
|
||||||
|
<framepos name="nose_pos" objtype="geom" objname="nose"/>
|
||||||
|
<framepos name="target_pos" objtype="geom" objname="target"/>
|
||||||
|
<framexaxis name="head_xaxis" objtype="xbody" objname="head"/>
|
||||||
|
<frameyaxis name="head_yaxis" objtype="xbody" objname="head"/>
|
||||||
|
<velocimeter name="head_vel" site="head"/>
|
||||||
|
<gyro name="head_gyro" site="head"/>
|
||||||
|
</sensor>
|
||||||
|
|
||||||
|
</mujoco>
|
292
Dreamer/local_dm_control_suite/tests/domains_test.py
Executable file
292
Dreamer/local_dm_control_suite/tests/domains_test.py
Executable file
@ -0,0 +1,292 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Tests for dm_control.suite domains."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
# Internal dependencies.
|
||||||
|
from absl.testing import absltest
|
||||||
|
from absl.testing import parameterized
|
||||||
|
from dm_control import suite
|
||||||
|
from dm_control.rl import control
|
||||||
|
import mock
|
||||||
|
import numpy as np
|
||||||
|
import six
|
||||||
|
from six.moves import range
|
||||||
|
from six.moves import zip
|
||||||
|
|
||||||
|
|
||||||
|
def uniform_random_policy(action_spec, random=None):
|
||||||
|
lower_bounds = action_spec.minimum
|
||||||
|
upper_bounds = action_spec.maximum
|
||||||
|
# Draw values between -1 and 1 for unbounded actions.
|
||||||
|
lower_bounds = np.where(np.isinf(lower_bounds), -1.0, lower_bounds)
|
||||||
|
upper_bounds = np.where(np.isinf(upper_bounds), 1.0, upper_bounds)
|
||||||
|
random_state = np.random.RandomState(random)
|
||||||
|
def policy(time_step):
|
||||||
|
del time_step # Unused.
|
||||||
|
return random_state.uniform(lower_bounds, upper_bounds)
|
||||||
|
return policy
|
||||||
|
|
||||||
|
|
||||||
|
def step_environment(env, policy, num_episodes=5, max_steps_per_episode=10):
|
||||||
|
for _ in range(num_episodes):
|
||||||
|
step_count = 0
|
||||||
|
time_step = env.reset()
|
||||||
|
yield time_step
|
||||||
|
while not time_step.last():
|
||||||
|
action = policy(time_step)
|
||||||
|
time_step = env.step(action)
|
||||||
|
step_count += 1
|
||||||
|
yield time_step
|
||||||
|
if step_count >= max_steps_per_episode:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def make_trajectory(domain, task, seed, **trajectory_kwargs):
|
||||||
|
env = suite.load(domain, task, task_kwargs={'random': seed})
|
||||||
|
policy = uniform_random_policy(env.action_spec(), random=seed)
|
||||||
|
return step_environment(env, policy, **trajectory_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class DomainTest(parameterized.TestCase):
|
||||||
|
"""Tests run on all the tasks registered."""
|
||||||
|
|
||||||
|
def test_constants(self):
|
||||||
|
num_tasks = sum(len(tasks) for tasks in
|
||||||
|
six.itervalues(suite.TASKS_BY_DOMAIN))
|
||||||
|
|
||||||
|
self.assertLen(suite.ALL_TASKS, num_tasks)
|
||||||
|
|
||||||
|
def _validate_observation(self, observation_dict, observation_spec):
|
||||||
|
obs = observation_dict.copy()
|
||||||
|
for name, spec in six.iteritems(observation_spec):
|
||||||
|
arr = obs.pop(name)
|
||||||
|
self.assertEqual(arr.shape, spec.shape)
|
||||||
|
self.assertEqual(arr.dtype, spec.dtype)
|
||||||
|
self.assertTrue(
|
||||||
|
np.all(np.isfinite(arr)),
|
||||||
|
msg='{!r} has non-finite value(s): {!r}'.format(name, arr))
|
||||||
|
self.assertEmpty(
|
||||||
|
obs,
|
||||||
|
msg='Observation contains arrays(s) that are not in the spec: {!r}'
|
||||||
|
.format(obs))
|
||||||
|
|
||||||
|
def _validate_reward_range(self, time_step):
|
||||||
|
if time_step.first():
|
||||||
|
self.assertIsNone(time_step.reward)
|
||||||
|
else:
|
||||||
|
self.assertIsInstance(time_step.reward, float)
|
||||||
|
self.assertBetween(time_step.reward, 0, 1)
|
||||||
|
|
||||||
|
def _validate_discount(self, time_step):
|
||||||
|
if time_step.first():
|
||||||
|
self.assertIsNone(time_step.discount)
|
||||||
|
else:
|
||||||
|
self.assertIsInstance(time_step.discount, float)
|
||||||
|
self.assertBetween(time_step.discount, 0, 1)
|
||||||
|
|
||||||
|
def _validate_control_range(self, lower_bounds, upper_bounds):
|
||||||
|
for b in lower_bounds:
|
||||||
|
self.assertEqual(b, -1.0)
|
||||||
|
for b in upper_bounds:
|
||||||
|
self.assertEqual(b, 1.0)
|
||||||
|
|
||||||
|
@parameterized.parameters(*suite.ALL_TASKS)
|
||||||
|
def test_components_have_names(self, domain, task):
|
||||||
|
env = suite.load(domain, task)
|
||||||
|
model = env.physics.model
|
||||||
|
|
||||||
|
object_types_and_size_fields = [
|
||||||
|
('body', 'nbody'),
|
||||||
|
('joint', 'njnt'),
|
||||||
|
('geom', 'ngeom'),
|
||||||
|
('site', 'nsite'),
|
||||||
|
('camera', 'ncam'),
|
||||||
|
('light', 'nlight'),
|
||||||
|
('mesh', 'nmesh'),
|
||||||
|
('hfield', 'nhfield'),
|
||||||
|
('texture', 'ntex'),
|
||||||
|
('material', 'nmat'),
|
||||||
|
('equality', 'neq'),
|
||||||
|
('tendon', 'ntendon'),
|
||||||
|
('actuator', 'nu'),
|
||||||
|
('sensor', 'nsensor'),
|
||||||
|
('numeric', 'nnumeric'),
|
||||||
|
('text', 'ntext'),
|
||||||
|
('tuple', 'ntuple'),
|
||||||
|
]
|
||||||
|
for object_type, size_field in object_types_and_size_fields:
|
||||||
|
for idx in range(getattr(model, size_field)):
|
||||||
|
object_name = model.id2name(idx, object_type)
|
||||||
|
self.assertNotEqual(object_name, '',
|
||||||
|
msg='Model {!r} contains unnamed {!r} with ID {}.'
|
||||||
|
.format(model.name, object_type, idx))
|
||||||
|
|
||||||
|
@parameterized.parameters(*suite.ALL_TASKS)
|
||||||
|
def test_model_has_at_least_2_cameras(self, domain, task):
|
||||||
|
env = suite.load(domain, task)
|
||||||
|
model = env.physics.model
|
||||||
|
self.assertGreaterEqual(model.ncam, 2,
|
||||||
|
'Model {!r} should have at least 2 cameras, has {}.'
|
||||||
|
.format(model.name, model.ncam))
|
||||||
|
|
||||||
|
@parameterized.parameters(*suite.ALL_TASKS)
|
||||||
|
def test_task_conforms_to_spec(self, domain, task):
|
||||||
|
"""Tests that the environment timesteps conform to specifications."""
|
||||||
|
is_benchmark = (domain, task) in suite.BENCHMARKING
|
||||||
|
env = suite.load(domain, task)
|
||||||
|
observation_spec = env.observation_spec()
|
||||||
|
action_spec = env.action_spec()
|
||||||
|
|
||||||
|
# Check action bounds.
|
||||||
|
if is_benchmark:
|
||||||
|
self._validate_control_range(action_spec.minimum, action_spec.maximum)
|
||||||
|
|
||||||
|
# Step through the environment, applying random actions sampled within the
|
||||||
|
# valid range and check the observations, rewards, and discounts.
|
||||||
|
policy = uniform_random_policy(action_spec)
|
||||||
|
for time_step in step_environment(env, policy):
|
||||||
|
self._validate_observation(time_step.observation, observation_spec)
|
||||||
|
self._validate_discount(time_step)
|
||||||
|
if is_benchmark:
|
||||||
|
self._validate_reward_range(time_step)
|
||||||
|
|
||||||
|
@parameterized.parameters(*suite.ALL_TASKS)
|
||||||
|
def test_environment_is_deterministic(self, domain, task):
|
||||||
|
"""Tests that identical seeds and actions produce identical trajectories."""
|
||||||
|
seed = 0
|
||||||
|
# Iterate over two trajectories generated using identical sequences of
|
||||||
|
# random actions, and with identical task random states. Check that the
|
||||||
|
# observations, rewards, discounts and step types are identical.
|
||||||
|
trajectory1 = make_trajectory(domain=domain, task=task, seed=seed)
|
||||||
|
trajectory2 = make_trajectory(domain=domain, task=task, seed=seed)
|
||||||
|
for time_step1, time_step2 in zip(trajectory1, trajectory2):
|
||||||
|
self.assertEqual(time_step1.step_type, time_step2.step_type)
|
||||||
|
self.assertEqual(time_step1.reward, time_step2.reward)
|
||||||
|
self.assertEqual(time_step1.discount, time_step2.discount)
|
||||||
|
for key in six.iterkeys(time_step1.observation):
|
||||||
|
np.testing.assert_array_equal(
|
||||||
|
time_step1.observation[key], time_step2.observation[key],
|
||||||
|
err_msg='Observation {!r} is not equal.'.format(key))
|
||||||
|
|
||||||
|
def assertCorrectColors(self, physics, reward):
|
||||||
|
colors = physics.named.model.mat_rgba
|
||||||
|
for material_name in ('self', 'effector', 'target'):
|
||||||
|
highlight = colors[material_name + '_highlight']
|
||||||
|
default = colors[material_name + '_default']
|
||||||
|
blend_coef = reward ** 4
|
||||||
|
expected = blend_coef * highlight + (1.0 - blend_coef) * default
|
||||||
|
actual = colors[material_name]
|
||||||
|
err_msg = ('Material {!r} has unexpected color.\nExpected: {!r}\n'
|
||||||
|
'Actual: {!r}'.format(material_name, expected, actual))
|
||||||
|
np.testing.assert_array_almost_equal(expected, actual, err_msg=err_msg)
|
||||||
|
|
||||||
|
@parameterized.parameters(*suite.ALL_TASKS)
|
||||||
|
def test_visualize_reward(self, domain, task):
|
||||||
|
env = suite.load(domain, task)
|
||||||
|
env.task.visualize_reward = True
|
||||||
|
action = np.zeros(env.action_spec().shape)
|
||||||
|
|
||||||
|
with mock.patch.object(env.task, 'get_reward') as mock_get_reward:
|
||||||
|
mock_get_reward.return_value = -3.0 # Rewards < 0 should be clipped.
|
||||||
|
env.reset()
|
||||||
|
mock_get_reward.assert_called_with(env.physics)
|
||||||
|
self.assertCorrectColors(env.physics, reward=0.0)
|
||||||
|
|
||||||
|
mock_get_reward.reset_mock()
|
||||||
|
mock_get_reward.return_value = 0.5
|
||||||
|
env.step(action)
|
||||||
|
mock_get_reward.assert_called_with(env.physics)
|
||||||
|
self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value)
|
||||||
|
|
||||||
|
mock_get_reward.reset_mock()
|
||||||
|
mock_get_reward.return_value = 2.0 # Rewards > 1 should be clipped.
|
||||||
|
env.step(action)
|
||||||
|
mock_get_reward.assert_called_with(env.physics)
|
||||||
|
self.assertCorrectColors(env.physics, reward=1.0)
|
||||||
|
|
||||||
|
mock_get_reward.reset_mock()
|
||||||
|
mock_get_reward.return_value = 0.25
|
||||||
|
env.reset()
|
||||||
|
mock_get_reward.assert_called_with(env.physics)
|
||||||
|
self.assertCorrectColors(env.physics, reward=mock_get_reward.return_value)
|
||||||
|
|
||||||
|
@parameterized.parameters(*suite.ALL_TASKS)
|
||||||
|
def test_task_supports_environment_kwargs(self, domain, task):
|
||||||
|
env = suite.load(domain, task,
|
||||||
|
environment_kwargs=dict(flat_observation=True))
|
||||||
|
# Check that the kwargs are actually passed through to the environment.
|
||||||
|
self.assertSetEqual(set(env.observation_spec()),
|
||||||
|
{control.FLAT_OBSERVATION_KEY})
|
||||||
|
|
||||||
|
@parameterized.parameters(*suite.ALL_TASKS)
|
||||||
|
def test_observation_arrays_dont_share_memory(self, domain, task):
|
||||||
|
env = suite.load(domain, task)
|
||||||
|
first_timestep = env.reset()
|
||||||
|
action = np.zeros(env.action_spec().shape)
|
||||||
|
second_timestep = env.step(action)
|
||||||
|
for name, first_array in six.iteritems(first_timestep.observation):
|
||||||
|
second_array = second_timestep.observation[name]
|
||||||
|
self.assertFalse(
|
||||||
|
np.may_share_memory(first_array, second_array),
|
||||||
|
msg='Consecutive observations of {!r} may share memory.'.format(name))
|
||||||
|
|
||||||
|
@parameterized.parameters(*suite.ALL_TASKS)
|
||||||
|
def test_observations_dont_contain_constant_elements(self, domain, task):
|
||||||
|
env = suite.load(domain, task)
|
||||||
|
trajectory = make_trajectory(domain=domain, task=task, seed=0,
|
||||||
|
num_episodes=2, max_steps_per_episode=1000)
|
||||||
|
observations = {name: [] for name in env.observation_spec()}
|
||||||
|
for time_step in trajectory:
|
||||||
|
for name, array in six.iteritems(time_step.observation):
|
||||||
|
observations[name].append(array)
|
||||||
|
|
||||||
|
failures = []
|
||||||
|
|
||||||
|
for name, array_list in six.iteritems(observations):
|
||||||
|
# Sampling random uniform actions generally isn't sufficient to trigger
|
||||||
|
# these touch sensors.
|
||||||
|
if (domain in ('manipulator', 'stacker') and name == 'touch' or
|
||||||
|
domain == 'quadruped' and name == 'force_torque'):
|
||||||
|
continue
|
||||||
|
stacked_arrays = np.array(array_list)
|
||||||
|
is_constant = np.all(stacked_arrays == stacked_arrays[0], axis=0)
|
||||||
|
has_constant_elements = (
|
||||||
|
is_constant if np.isscalar(is_constant) else np.any(is_constant))
|
||||||
|
if has_constant_elements:
|
||||||
|
failures.append((name, is_constant))
|
||||||
|
|
||||||
|
self.assertEmpty(
|
||||||
|
failures,
|
||||||
|
msg='The following observation(s) contain constant elements:\n{}'
|
||||||
|
.format('\n'.join(':\t'.join([name, str(is_constant)])
|
||||||
|
for (name, is_constant) in failures)))
|
||||||
|
|
||||||
|
@parameterized.parameters(*suite.ALL_TASKS)
|
||||||
|
def test_initial_state_is_randomized(self, domain, task):
|
||||||
|
env = suite.load(domain, task, task_kwargs={'random': 42})
|
||||||
|
obs1 = env.reset().observation
|
||||||
|
obs2 = env.reset().observation
|
||||||
|
self.assertFalse(
|
||||||
|
all(np.all(obs1[k] == obs2[k]) for k in obs1),
|
||||||
|
'Two consecutive initial states have identical observations.\n'
|
||||||
|
'First: {}\nSecond: {}'.format(obs1, obs2))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
absltest.main()
|
52
Dreamer/local_dm_control_suite/tests/loader_test.py
Executable file
52
Dreamer/local_dm_control_suite/tests/loader_test.py
Executable file
@ -0,0 +1,52 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Tests for the dm_control.suite loader."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
# Internal dependencies.
|
||||||
|
|
||||||
|
from absl.testing import absltest
|
||||||
|
|
||||||
|
from dm_control import suite
|
||||||
|
from dm_control.rl import control
|
||||||
|
|
||||||
|
|
||||||
|
class LoaderTest(absltest.TestCase):
|
||||||
|
|
||||||
|
def test_load_without_kwargs(self):
|
||||||
|
env = suite.load('cartpole', 'swingup')
|
||||||
|
self.assertIsInstance(env, control.Environment)
|
||||||
|
|
||||||
|
def test_load_with_kwargs(self):
|
||||||
|
env = suite.load('cartpole', 'swingup',
|
||||||
|
task_kwargs={'time_limit': 40, 'random': 99})
|
||||||
|
self.assertIsInstance(env, control.Environment)
|
||||||
|
|
||||||
|
|
||||||
|
class LoaderConstantsTest(absltest.TestCase):
|
||||||
|
|
||||||
|
def testSuiteConstants(self):
|
||||||
|
self.assertNotEmpty(suite.BENCHMARKING)
|
||||||
|
self.assertNotEmpty(suite.EASY)
|
||||||
|
self.assertNotEmpty(suite.HARD)
|
||||||
|
self.assertNotEmpty(suite.EXTRA)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
absltest.main()
|
88
Dreamer/local_dm_control_suite/tests/lqr_test.py
Executable file
88
Dreamer/local_dm_control_suite/tests/lqr_test.py
Executable file
@ -0,0 +1,88 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Tests specific to the LQR domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import math
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
# Internal dependencies.
|
||||||
|
from absl import logging
|
||||||
|
|
||||||
|
from absl.testing import absltest
|
||||||
|
from absl.testing import parameterized
|
||||||
|
|
||||||
|
from local_dm_control_suite import lqr
|
||||||
|
from local_dm_control_suite import lqr_solver
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from six.moves import range
|
||||||
|
|
||||||
|
|
||||||
|
class LqrTest(parameterized.TestCase):
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
('lqr_2_1', lqr.lqr_2_1),
|
||||||
|
('lqr_6_2', lqr.lqr_6_2))
|
||||||
|
def test_lqr_optimal_policy(self, make_env):
|
||||||
|
env = make_env()
|
||||||
|
p, k, beta = lqr_solver.solve(env)
|
||||||
|
self.assertPolicyisOptimal(env, p, k, beta)
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
('lqr_2_1', lqr.lqr_2_1),
|
||||||
|
('lqr_6_2', lqr.lqr_6_2))
|
||||||
|
@unittest.skipUnless(
|
||||||
|
condition=lqr_solver.sp,
|
||||||
|
reason='scipy is not available, so non-scipy DARE solver is the default.')
|
||||||
|
def test_lqr_optimal_policy_no_scipy(self, make_env):
|
||||||
|
env = make_env()
|
||||||
|
old_sp = lqr_solver.sp
|
||||||
|
try:
|
||||||
|
lqr_solver.sp = None # Force the solver to use the non-scipy code path.
|
||||||
|
p, k, beta = lqr_solver.solve(env)
|
||||||
|
finally:
|
||||||
|
lqr_solver.sp = old_sp
|
||||||
|
self.assertPolicyisOptimal(env, p, k, beta)
|
||||||
|
|
||||||
|
def assertPolicyisOptimal(self, env, p, k, beta):
|
||||||
|
tolerance = 1e-3
|
||||||
|
n_steps = int(math.ceil(math.log10(tolerance) / math.log10(beta)))
|
||||||
|
logging.info('%d timesteps for %g convergence.', n_steps, tolerance)
|
||||||
|
total_loss = 0.0
|
||||||
|
|
||||||
|
timestep = env.reset()
|
||||||
|
initial_state = np.hstack((timestep.observation['position'],
|
||||||
|
timestep.observation['velocity']))
|
||||||
|
logging.info('Measuring total cost over %d steps.', n_steps)
|
||||||
|
for _ in range(n_steps):
|
||||||
|
x = np.hstack((timestep.observation['position'],
|
||||||
|
timestep.observation['velocity']))
|
||||||
|
# u = k*x is the optimal policy
|
||||||
|
u = k.dot(x)
|
||||||
|
total_loss += 1 - (timestep.reward or 0.0)
|
||||||
|
timestep = env.step(u)
|
||||||
|
|
||||||
|
logging.info('Analytical expected total cost is .5*x^T*p*x.')
|
||||||
|
expected_loss = .5 * initial_state.T.dot(p).dot(initial_state)
|
||||||
|
logging.info('Comparing measured and predicted costs.')
|
||||||
|
np.testing.assert_allclose(expected_loss, total_loss, rtol=tolerance)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
absltest.main()
|
16
Dreamer/local_dm_control_suite/utils/__init__.py
Executable file
16
Dreamer/local_dm_control_suite/utils/__init__.py
Executable file
@ -0,0 +1,16 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Utility functions used in the control suite."""
|
251
Dreamer/local_dm_control_suite/utils/parse_amc.py
Executable file
251
Dreamer/local_dm_control_suite/utils/parse_amc.py
Executable file
@ -0,0 +1,251 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Parse and convert amc motion capture data."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control.mujoco.wrapper import mjbindings
|
||||||
|
import numpy as np
|
||||||
|
from scipy import interpolate
|
||||||
|
from six.moves import range
|
||||||
|
|
||||||
|
mjlib = mjbindings.mjlib
|
||||||
|
|
||||||
|
MOCAP_DT = 1.0/120.0
|
||||||
|
CONVERSION_LENGTH = 0.056444
|
||||||
|
|
||||||
|
_CMU_MOCAP_JOINT_ORDER = (
|
||||||
|
'root0', 'root1', 'root2', 'root3', 'root4', 'root5', 'lowerbackrx',
|
||||||
|
'lowerbackry', 'lowerbackrz', 'upperbackrx', 'upperbackry', 'upperbackrz',
|
||||||
|
'thoraxrx', 'thoraxry', 'thoraxrz', 'lowerneckrx', 'lowerneckry',
|
||||||
|
'lowerneckrz', 'upperneckrx', 'upperneckry', 'upperneckrz', 'headrx',
|
||||||
|
'headry', 'headrz', 'rclaviclery', 'rclaviclerz', 'rhumerusrx',
|
||||||
|
'rhumerusry', 'rhumerusrz', 'rradiusrx', 'rwristry', 'rhandrx', 'rhandrz',
|
||||||
|
'rfingersrx', 'rthumbrx', 'rthumbrz', 'lclaviclery', 'lclaviclerz',
|
||||||
|
'lhumerusrx', 'lhumerusry', 'lhumerusrz', 'lradiusrx', 'lwristry',
|
||||||
|
'lhandrx', 'lhandrz', 'lfingersrx', 'lthumbrx', 'lthumbrz', 'rfemurrx',
|
||||||
|
'rfemurry', 'rfemurrz', 'rtibiarx', 'rfootrx', 'rfootrz', 'rtoesrx',
|
||||||
|
'lfemurrx', 'lfemurry', 'lfemurrz', 'ltibiarx', 'lfootrx', 'lfootrz',
|
||||||
|
'ltoesrx'
|
||||||
|
)
|
||||||
|
|
||||||
|
Converted = collections.namedtuple('Converted',
|
||||||
|
['qpos', 'qvel', 'time'])
|
||||||
|
|
||||||
|
|
||||||
|
def convert(file_name, physics, timestep):
|
||||||
|
"""Converts the parsed .amc values into qpos and qvel values and resamples.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_name: The .amc file to be parsed and converted.
|
||||||
|
physics: The corresponding physics instance.
|
||||||
|
timestep: Desired output interval between resampled frames.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A namedtuple with fields:
|
||||||
|
`qpos`, a numpy array containing converted positional variables.
|
||||||
|
`qvel`, a numpy array containing converted velocity variables.
|
||||||
|
`time`, a numpy array containing the corresponding times.
|
||||||
|
"""
|
||||||
|
frame_values = parse(file_name)
|
||||||
|
joint2index = {}
|
||||||
|
for name in physics.named.data.qpos.axes.row.names:
|
||||||
|
joint2index[name] = physics.named.data.qpos.axes.row.convert_key_item(name)
|
||||||
|
index2joint = {}
|
||||||
|
for joint, index in joint2index.items():
|
||||||
|
if isinstance(index, slice):
|
||||||
|
indices = range(index.start, index.stop)
|
||||||
|
else:
|
||||||
|
indices = [index]
|
||||||
|
for ii in indices:
|
||||||
|
index2joint[ii] = joint
|
||||||
|
|
||||||
|
# Convert frame_values to qpos
|
||||||
|
amcvals2qpos_transformer = Amcvals2qpos(index2joint, _CMU_MOCAP_JOINT_ORDER)
|
||||||
|
qpos_values = []
|
||||||
|
for frame_value in frame_values:
|
||||||
|
qpos_values.append(amcvals2qpos_transformer(frame_value))
|
||||||
|
qpos_values = np.stack(qpos_values) # Time by nq
|
||||||
|
|
||||||
|
# Interpolate/resample.
|
||||||
|
# Note: interpolate quaternions rather than euler angles (slerp).
|
||||||
|
# see https://en.wikipedia.org/wiki/Slerp
|
||||||
|
qpos_values_resampled = []
|
||||||
|
time_vals = np.arange(0, len(frame_values)*MOCAP_DT - 1e-8, MOCAP_DT)
|
||||||
|
time_vals_new = np.arange(0, len(frame_values)*MOCAP_DT, timestep)
|
||||||
|
while time_vals_new[-1] > time_vals[-1]:
|
||||||
|
time_vals_new = time_vals_new[:-1]
|
||||||
|
|
||||||
|
for i in range(qpos_values.shape[1]):
|
||||||
|
f = interpolate.splrep(time_vals, qpos_values[:, i])
|
||||||
|
qpos_values_resampled.append(interpolate.splev(time_vals_new, f))
|
||||||
|
|
||||||
|
qpos_values_resampled = np.stack(qpos_values_resampled) # nq by ntime
|
||||||
|
|
||||||
|
qvel_list = []
|
||||||
|
for t in range(qpos_values_resampled.shape[1]-1):
|
||||||
|
p_tp1 = qpos_values_resampled[:, t + 1]
|
||||||
|
p_t = qpos_values_resampled[:, t]
|
||||||
|
qvel = [(p_tp1[:3]-p_t[:3])/ timestep,
|
||||||
|
mj_quat2vel(mj_quatdiff(p_t[3:7], p_tp1[3:7]), timestep),
|
||||||
|
(p_tp1[7:]-p_t[7:])/ timestep]
|
||||||
|
qvel_list.append(np.concatenate(qvel))
|
||||||
|
|
||||||
|
qvel_values_resampled = np.vstack(qvel_list).T
|
||||||
|
|
||||||
|
return Converted(qpos_values_resampled, qvel_values_resampled, time_vals_new)
|
||||||
|
|
||||||
|
|
||||||
|
def parse(file_name):
|
||||||
|
"""Parses the amc file format."""
|
||||||
|
values = []
|
||||||
|
fid = open(file_name, 'r')
|
||||||
|
line = fid.readline().strip()
|
||||||
|
frame_ind = 1
|
||||||
|
first_frame = True
|
||||||
|
while True:
|
||||||
|
# Parse first frame.
|
||||||
|
if first_frame and line[0] == str(frame_ind):
|
||||||
|
first_frame = False
|
||||||
|
frame_ind += 1
|
||||||
|
frame_vals = []
|
||||||
|
while True:
|
||||||
|
line = fid.readline().strip()
|
||||||
|
if not line or line == str(frame_ind):
|
||||||
|
values.append(np.array(frame_vals, dtype=np.float))
|
||||||
|
break
|
||||||
|
tokens = line.split()
|
||||||
|
frame_vals.extend(tokens[1:])
|
||||||
|
# Parse other frames.
|
||||||
|
elif line == str(frame_ind):
|
||||||
|
frame_ind += 1
|
||||||
|
frame_vals = []
|
||||||
|
while True:
|
||||||
|
line = fid.readline().strip()
|
||||||
|
if not line or line == str(frame_ind):
|
||||||
|
values.append(np.array(frame_vals, dtype=np.float))
|
||||||
|
break
|
||||||
|
tokens = line.split()
|
||||||
|
frame_vals.extend(tokens[1:])
|
||||||
|
else:
|
||||||
|
line = fid.readline().strip()
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
class Amcvals2qpos(object):
|
||||||
|
"""Callable that converts .amc values for a frame and to MuJoCo qpos format.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, index2joint, joint_order):
|
||||||
|
"""Initializes a new Amcvals2qpos instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
index2joint: List of joint angles in .amc file.
|
||||||
|
joint_order: List of joint names in MuJoco MJCF.
|
||||||
|
"""
|
||||||
|
# Root is x,y,z, then quat.
|
||||||
|
# need to get indices of qpos that order for amc default order
|
||||||
|
self.qpos_root_xyz_ind = [0, 1, 2]
|
||||||
|
self.root_xyz_ransform = np.array(
|
||||||
|
[[1, 0, 0], [0, 0, -1], [0, 1, 0]]) * CONVERSION_LENGTH
|
||||||
|
self.qpos_root_quat_ind = [3, 4, 5, 6]
|
||||||
|
amc2qpos_transform = np.zeros((len(index2joint), len(joint_order)))
|
||||||
|
for i in range(len(index2joint)):
|
||||||
|
for j in range(len(joint_order)):
|
||||||
|
if index2joint[i] == joint_order[j]:
|
||||||
|
if 'rx' in index2joint[i]:
|
||||||
|
amc2qpos_transform[i][j] = 1
|
||||||
|
elif 'ry' in index2joint[i]:
|
||||||
|
amc2qpos_transform[i][j] = 1
|
||||||
|
elif 'rz' in index2joint[i]:
|
||||||
|
amc2qpos_transform[i][j] = 1
|
||||||
|
self.amc2qpos_transform = amc2qpos_transform
|
||||||
|
|
||||||
|
def __call__(self, amc_val):
|
||||||
|
"""Converts a `.amc` frame to MuJoCo qpos format."""
|
||||||
|
amc_val_rad = np.deg2rad(amc_val)
|
||||||
|
qpos = np.dot(self.amc2qpos_transform, amc_val_rad)
|
||||||
|
|
||||||
|
# Root.
|
||||||
|
qpos[:3] = np.dot(self.root_xyz_ransform, amc_val[:3])
|
||||||
|
qpos_quat = euler2quat(amc_val[3], amc_val[4], amc_val[5])
|
||||||
|
qpos_quat = mj_quatprod(euler2quat(90, 0, 0), qpos_quat)
|
||||||
|
|
||||||
|
for i, ind in enumerate(self.qpos_root_quat_ind):
|
||||||
|
qpos[ind] = qpos_quat[i]
|
||||||
|
|
||||||
|
return qpos
|
||||||
|
|
||||||
|
|
||||||
|
def euler2quat(ax, ay, az):
|
||||||
|
"""Converts euler angles to a quaternion.
|
||||||
|
|
||||||
|
Note: rotation order is zyx
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ax: Roll angle (deg)
|
||||||
|
ay: Pitch angle (deg).
|
||||||
|
az: Yaw angle (deg).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A numpy array representing the rotation as a quaternion.
|
||||||
|
"""
|
||||||
|
r1 = az
|
||||||
|
r2 = ay
|
||||||
|
r3 = ax
|
||||||
|
|
||||||
|
c1 = np.cos(np.deg2rad(r1 / 2))
|
||||||
|
s1 = np.sin(np.deg2rad(r1 / 2))
|
||||||
|
c2 = np.cos(np.deg2rad(r2 / 2))
|
||||||
|
s2 = np.sin(np.deg2rad(r2 / 2))
|
||||||
|
c3 = np.cos(np.deg2rad(r3 / 2))
|
||||||
|
s3 = np.sin(np.deg2rad(r3 / 2))
|
||||||
|
|
||||||
|
q0 = c1 * c2 * c3 + s1 * s2 * s3
|
||||||
|
q1 = c1 * c2 * s3 - s1 * s2 * c3
|
||||||
|
q2 = c1 * s2 * c3 + s1 * c2 * s3
|
||||||
|
q3 = s1 * c2 * c3 - c1 * s2 * s3
|
||||||
|
|
||||||
|
return np.array([q0, q1, q2, q3])
|
||||||
|
|
||||||
|
|
||||||
|
def mj_quatprod(q, r):
|
||||||
|
quaternion = np.zeros(4)
|
||||||
|
mjlib.mju_mulQuat(quaternion, np.ascontiguousarray(q),
|
||||||
|
np.ascontiguousarray(r))
|
||||||
|
return quaternion
|
||||||
|
|
||||||
|
|
||||||
|
def mj_quat2vel(q, dt):
|
||||||
|
vel = np.zeros(3)
|
||||||
|
mjlib.mju_quat2Vel(vel, np.ascontiguousarray(q), dt)
|
||||||
|
return vel
|
||||||
|
|
||||||
|
|
||||||
|
def mj_quatneg(q):
|
||||||
|
quaternion = np.zeros(4)
|
||||||
|
mjlib.mju_negQuat(quaternion, np.ascontiguousarray(q))
|
||||||
|
return quaternion
|
||||||
|
|
||||||
|
|
||||||
|
def mj_quatdiff(source, target):
|
||||||
|
return mj_quatprod(mj_quatneg(source), np.ascontiguousarray(target))
|
68
Dreamer/local_dm_control_suite/utils/parse_amc_test.py
Executable file
68
Dreamer/local_dm_control_suite/utils/parse_amc_test.py
Executable file
@ -0,0 +1,68 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Tests for parse_amc utility."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Internal dependencies.
|
||||||
|
|
||||||
|
from absl.testing import absltest
|
||||||
|
from local_dm_control_suite import humanoid_CMU
|
||||||
|
from dm_control.suite.utils import parse_amc
|
||||||
|
|
||||||
|
from dm_control.utils import io as resources
|
||||||
|
|
||||||
|
_TEST_AMC_PATH = resources.GetResourceFilename(
|
||||||
|
os.path.join(os.path.dirname(__file__), '../demos/zeros.amc'))
|
||||||
|
|
||||||
|
|
||||||
|
class ParseAMCTest(absltest.TestCase):
|
||||||
|
|
||||||
|
def test_sizes_of_parsed_data(self):
|
||||||
|
|
||||||
|
# Instantiate the humanoid environment.
|
||||||
|
env = humanoid_CMU.stand()
|
||||||
|
|
||||||
|
# Parse and convert specified clip.
|
||||||
|
converted = parse_amc.convert(
|
||||||
|
_TEST_AMC_PATH, env.physics, env.control_timestep())
|
||||||
|
|
||||||
|
self.assertEqual(converted.qpos.shape[0], 63)
|
||||||
|
self.assertEqual(converted.qvel.shape[0], 62)
|
||||||
|
self.assertEqual(converted.time.shape[0], converted.qpos.shape[1])
|
||||||
|
self.assertEqual(converted.qpos.shape[1],
|
||||||
|
converted.qvel.shape[1] + 1)
|
||||||
|
|
||||||
|
# Parse and convert specified clip -- WITH SMALLER TIMESTEP
|
||||||
|
converted2 = parse_amc.convert(
|
||||||
|
_TEST_AMC_PATH, env.physics, 0.5 * env.control_timestep())
|
||||||
|
|
||||||
|
self.assertEqual(converted2.qpos.shape[0], 63)
|
||||||
|
self.assertEqual(converted2.qvel.shape[0], 62)
|
||||||
|
self.assertEqual(converted2.time.shape[0], converted2.qpos.shape[1])
|
||||||
|
self.assertEqual(converted.qpos.shape[1],
|
||||||
|
converted.qvel.shape[1] + 1)
|
||||||
|
|
||||||
|
# Compare sizes of parsed objects for different timesteps
|
||||||
|
self.assertEqual(converted.qpos.shape[1] * 2, converted2.qpos.shape[1])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
absltest.main()
|
91
Dreamer/local_dm_control_suite/utils/randomizers.py
Executable file
91
Dreamer/local_dm_control_suite/utils/randomizers.py
Executable file
@ -0,0 +1,91 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Randomization functions."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
from dm_control.mujoco.wrapper import mjbindings
|
||||||
|
import numpy as np
|
||||||
|
from six.moves import range
|
||||||
|
|
||||||
|
|
||||||
|
def random_limited_quaternion(random, limit):
|
||||||
|
"""Generates a random quaternion limited to the specified rotations."""
|
||||||
|
axis = random.randn(3)
|
||||||
|
axis /= np.linalg.norm(axis)
|
||||||
|
angle = random.rand() * limit
|
||||||
|
|
||||||
|
quaternion = np.zeros(4)
|
||||||
|
mjbindings.mjlib.mju_axisAngle2Quat(quaternion, axis, angle)
|
||||||
|
|
||||||
|
return quaternion
|
||||||
|
|
||||||
|
|
||||||
|
def randomize_limited_and_rotational_joints(physics, random=None):
|
||||||
|
"""Randomizes the positions of joints defined in the physics body.
|
||||||
|
|
||||||
|
The following randomization rules apply:
|
||||||
|
- Bounded joints (hinges or sliders) are sampled uniformly in the bounds.
|
||||||
|
- Unbounded hinges are samples uniformly in [-pi, pi]
|
||||||
|
- Quaternions for unlimited free joints and ball joints are sampled
|
||||||
|
uniformly on the unit 3-sphere.
|
||||||
|
- Quaternions for limited ball joints are sampled uniformly on a sector
|
||||||
|
of the unit 3-sphere.
|
||||||
|
- The linear degrees of freedom of free joints are not randomized.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: Instance of 'Physics' class that holds a loaded model.
|
||||||
|
random: Optional instance of 'np.random.RandomState'. Defaults to the global
|
||||||
|
NumPy random state.
|
||||||
|
"""
|
||||||
|
random = random or np.random
|
||||||
|
|
||||||
|
hinge = mjbindings.enums.mjtJoint.mjJNT_HINGE
|
||||||
|
slide = mjbindings.enums.mjtJoint.mjJNT_SLIDE
|
||||||
|
ball = mjbindings.enums.mjtJoint.mjJNT_BALL
|
||||||
|
free = mjbindings.enums.mjtJoint.mjJNT_FREE
|
||||||
|
|
||||||
|
qpos = physics.named.data.qpos
|
||||||
|
|
||||||
|
for joint_id in range(physics.model.njnt):
|
||||||
|
joint_name = physics.model.id2name(joint_id, 'joint')
|
||||||
|
joint_type = physics.model.jnt_type[joint_id]
|
||||||
|
is_limited = physics.model.jnt_limited[joint_id]
|
||||||
|
range_min, range_max = physics.model.jnt_range[joint_id]
|
||||||
|
|
||||||
|
if is_limited:
|
||||||
|
if joint_type == hinge or joint_type == slide:
|
||||||
|
qpos[joint_name] = random.uniform(range_min, range_max)
|
||||||
|
|
||||||
|
elif joint_type == ball:
|
||||||
|
qpos[joint_name] = random_limited_quaternion(random, range_max)
|
||||||
|
|
||||||
|
else:
|
||||||
|
if joint_type == hinge:
|
||||||
|
qpos[joint_name] = random.uniform(-np.pi, np.pi)
|
||||||
|
|
||||||
|
elif joint_type == ball:
|
||||||
|
quat = random.randn(4)
|
||||||
|
quat /= np.linalg.norm(quat)
|
||||||
|
qpos[joint_name] = quat
|
||||||
|
|
||||||
|
elif joint_type == free:
|
||||||
|
quat = random.rand(4)
|
||||||
|
quat /= np.linalg.norm(quat)
|
||||||
|
qpos[joint_name][3:] = quat
|
||||||
|
|
164
Dreamer/local_dm_control_suite/utils/randomizers_test.py
Executable file
164
Dreamer/local_dm_control_suite/utils/randomizers_test.py
Executable file
@ -0,0 +1,164 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Tests for randomizers.py."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
# Internal dependencies.
|
||||||
|
from absl.testing import absltest
|
||||||
|
from absl.testing import parameterized
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.mujoco.wrapper import mjbindings
|
||||||
|
from dm_control.suite.utils import randomizers
|
||||||
|
import numpy as np
|
||||||
|
from six.moves import range
|
||||||
|
|
||||||
|
mjlib = mjbindings.mjlib
|
||||||
|
|
||||||
|
|
||||||
|
class RandomizeUnlimitedJointsTest(parameterized.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.rand = np.random.RandomState(100)
|
||||||
|
|
||||||
|
def test_single_joint_of_each_type(self):
|
||||||
|
physics = mujoco.Physics.from_xml_string("""<mujoco>
|
||||||
|
<default>
|
||||||
|
<joint range="0 90" />
|
||||||
|
</default>
|
||||||
|
<worldbody>
|
||||||
|
<body>
|
||||||
|
<geom type="box" size="1 1 1"/>
|
||||||
|
<joint name="free" type="free"/>
|
||||||
|
</body>
|
||||||
|
<body>
|
||||||
|
<geom type="box" size="1 1 1"/>
|
||||||
|
<joint name="limited_hinge" type="hinge" limited="true"/>
|
||||||
|
<joint name="slide" type="slide"/>
|
||||||
|
<joint name="limited_slide" type="slide" limited="true"/>
|
||||||
|
<joint name="hinge" type="hinge"/>
|
||||||
|
</body>
|
||||||
|
<body>
|
||||||
|
<geom type="box" size="1 1 1"/>
|
||||||
|
<joint name="ball" type="ball"/>
|
||||||
|
</body>
|
||||||
|
<body>
|
||||||
|
<geom type="box" size="1 1 1"/>
|
||||||
|
<joint name="limited_ball" type="ball" limited="true"/>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
</mujoco>""")
|
||||||
|
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, self.rand)
|
||||||
|
self.assertNotEqual(0., physics.named.data.qpos['hinge'])
|
||||||
|
self.assertNotEqual(0., physics.named.data.qpos['limited_hinge'])
|
||||||
|
self.assertNotEqual(0., physics.named.data.qpos['limited_slide'])
|
||||||
|
|
||||||
|
self.assertNotEqual(0., np.sum(physics.named.data.qpos['ball']))
|
||||||
|
self.assertNotEqual(0., np.sum(physics.named.data.qpos['limited_ball']))
|
||||||
|
|
||||||
|
self.assertNotEqual(0., np.sum(physics.named.data.qpos['free'][3:]))
|
||||||
|
|
||||||
|
# Unlimited slide and the positional part of the free joint remains
|
||||||
|
# uninitialized.
|
||||||
|
self.assertEqual(0., physics.named.data.qpos['slide'])
|
||||||
|
self.assertEqual(0., np.sum(physics.named.data.qpos['free'][:3]))
|
||||||
|
|
||||||
|
def test_multiple_joints_of_same_type(self):
|
||||||
|
physics = mujoco.Physics.from_xml_string("""<mujoco>
|
||||||
|
<worldbody>
|
||||||
|
<body>
|
||||||
|
<geom type="box" size="1 1 1"/>
|
||||||
|
<joint name="hinge_1" type="hinge"/>
|
||||||
|
<joint name="hinge_2" type="hinge"/>
|
||||||
|
<joint name="hinge_3" type="hinge"/>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
</mujoco>""")
|
||||||
|
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, self.rand)
|
||||||
|
self.assertNotEqual(0., physics.named.data.qpos['hinge_1'])
|
||||||
|
self.assertNotEqual(0., physics.named.data.qpos['hinge_2'])
|
||||||
|
self.assertNotEqual(0., physics.named.data.qpos['hinge_3'])
|
||||||
|
|
||||||
|
self.assertNotEqual(physics.named.data.qpos['hinge_1'],
|
||||||
|
physics.named.data.qpos['hinge_2'])
|
||||||
|
|
||||||
|
self.assertNotEqual(physics.named.data.qpos['hinge_2'],
|
||||||
|
physics.named.data.qpos['hinge_3'])
|
||||||
|
|
||||||
|
self.assertNotEqual(physics.named.data.qpos['hinge_1'],
|
||||||
|
physics.named.data.qpos['hinge_3'])
|
||||||
|
|
||||||
|
def test_unlimited_hinge_randomization_range(self):
|
||||||
|
physics = mujoco.Physics.from_xml_string("""<mujoco>
|
||||||
|
<worldbody>
|
||||||
|
<body>
|
||||||
|
<geom type="box" size="1 1 1"/>
|
||||||
|
<joint name="hinge" type="hinge"/>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
</mujoco>""")
|
||||||
|
|
||||||
|
for _ in range(10):
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, self.rand)
|
||||||
|
self.assertBetween(physics.named.data.qpos['hinge'], -np.pi, np.pi)
|
||||||
|
|
||||||
|
def test_limited_1d_joint_limits_are_respected(self):
|
||||||
|
physics = mujoco.Physics.from_xml_string("""<mujoco>
|
||||||
|
<default>
|
||||||
|
<joint limited="true"/>
|
||||||
|
</default>
|
||||||
|
<worldbody>
|
||||||
|
<body>
|
||||||
|
<geom type="box" size="1 1 1"/>
|
||||||
|
<joint name="hinge" type="hinge" range="0 10"/>
|
||||||
|
<joint name="slide" type="slide" range="30 50"/>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
</mujoco>""")
|
||||||
|
|
||||||
|
for _ in range(10):
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, self.rand)
|
||||||
|
self.assertBetween(physics.named.data.qpos['hinge'],
|
||||||
|
np.deg2rad(0), np.deg2rad(10))
|
||||||
|
self.assertBetween(physics.named.data.qpos['slide'], 30, 50)
|
||||||
|
|
||||||
|
def test_limited_ball_joint_are_respected(self):
|
||||||
|
physics = mujoco.Physics.from_xml_string("""<mujoco>
|
||||||
|
<worldbody>
|
||||||
|
<body name="body" zaxis="1 0 0">
|
||||||
|
<geom type="box" size="1 1 1"/>
|
||||||
|
<joint name="ball" type="ball" limited="true" range="0 60"/>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
</mujoco>""")
|
||||||
|
|
||||||
|
body_axis = np.array([1., 0., 0.])
|
||||||
|
joint_axis = np.zeros(3)
|
||||||
|
for _ in range(10):
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, self.rand)
|
||||||
|
|
||||||
|
quat = physics.named.data.qpos['ball']
|
||||||
|
mjlib.mju_rotVecQuat(joint_axis, body_axis, quat)
|
||||||
|
angle_cos = np.dot(body_axis, joint_axis)
|
||||||
|
self.assertGreater(angle_cos, 0.5) # cos(60) = 0.5
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
absltest.main()
|
158
Dreamer/local_dm_control_suite/walker.py
Executable file
158
Dreamer/local_dm_control_suite/walker.py
Executable file
@ -0,0 +1,158 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Planar Walker Domain."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
from dm_control import mujoco
|
||||||
|
from dm_control.rl import control
|
||||||
|
from local_dm_control_suite import base
|
||||||
|
from local_dm_control_suite import common
|
||||||
|
from dm_control.suite.utils import randomizers
|
||||||
|
from dm_control.utils import containers
|
||||||
|
from dm_control.utils import rewards
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_TIME_LIMIT = 25
|
||||||
|
_CONTROL_TIMESTEP = .025
|
||||||
|
|
||||||
|
# Minimal height of torso over foot above which stand reward is 1.
|
||||||
|
_STAND_HEIGHT = 1.2
|
||||||
|
|
||||||
|
# Horizontal speeds (meters/second) above which move reward is 1.
|
||||||
|
_WALK_SPEED = 1
|
||||||
|
_RUN_SPEED = 8
|
||||||
|
|
||||||
|
|
||||||
|
SUITE = containers.TaggedTasks()
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_and_assets():
|
||||||
|
"""Returns a tuple containing the model XML string and a dict of assets."""
|
||||||
|
return common.read_model('walker.xml'), common.ASSETS
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def stand(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Stand task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = PlanarWalker(move_speed=0, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Walk task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = PlanarWalker(move_speed=_WALK_SPEED, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@SUITE.add('benchmarking')
|
||||||
|
def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
|
||||||
|
"""Returns the Run task."""
|
||||||
|
physics = Physics.from_xml_string(*get_model_and_assets())
|
||||||
|
task = PlanarWalker(move_speed=_RUN_SPEED, random=random)
|
||||||
|
environment_kwargs = environment_kwargs or {}
|
||||||
|
return control.Environment(
|
||||||
|
physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
|
||||||
|
**environment_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Physics(mujoco.Physics):
|
||||||
|
"""Physics simulation with additional features for the Walker domain."""
|
||||||
|
|
||||||
|
def torso_upright(self):
|
||||||
|
"""Returns projection from z-axes of torso to the z-axes of world."""
|
||||||
|
return self.named.data.xmat['torso', 'zz']
|
||||||
|
|
||||||
|
def torso_height(self):
|
||||||
|
"""Returns the height of the torso."""
|
||||||
|
return self.named.data.xpos['torso', 'z']
|
||||||
|
|
||||||
|
def horizontal_velocity(self):
|
||||||
|
"""Returns the horizontal velocity of the center-of-mass."""
|
||||||
|
return self.named.data.sensordata['torso_subtreelinvel'][0]
|
||||||
|
|
||||||
|
def orientations(self):
|
||||||
|
"""Returns planar orientations of all bodies."""
|
||||||
|
return self.named.data.xmat[1:, ['xx', 'xz']].ravel()
|
||||||
|
|
||||||
|
|
||||||
|
class PlanarWalker(base.Task):
|
||||||
|
"""A planar walker task."""
|
||||||
|
|
||||||
|
def __init__(self, move_speed, random=None):
|
||||||
|
"""Initializes an instance of `PlanarWalker`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
move_speed: A float. If this value is zero, reward is given simply for
|
||||||
|
standing up. Otherwise this specifies a target horizontal velocity for
|
||||||
|
the walking task.
|
||||||
|
random: Optional, either a `numpy.random.RandomState` instance, an
|
||||||
|
integer seed for creating a new `RandomState`, or None to select a seed
|
||||||
|
automatically (default).
|
||||||
|
"""
|
||||||
|
self._move_speed = move_speed
|
||||||
|
super(PlanarWalker, self).__init__(random=random)
|
||||||
|
|
||||||
|
def initialize_episode(self, physics):
|
||||||
|
"""Sets the state of the environment at the start of each episode.
|
||||||
|
|
||||||
|
In 'standing' mode, use initial orientation and small velocities.
|
||||||
|
In 'random' mode, randomize joint angles and let fall to the floor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
physics: An instance of `Physics`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
randomizers.randomize_limited_and_rotational_joints(physics, self.random)
|
||||||
|
super(PlanarWalker, self).initialize_episode(physics)
|
||||||
|
|
||||||
|
def get_observation(self, physics):
|
||||||
|
"""Returns an observation of body orientations, height and velocites."""
|
||||||
|
obs = collections.OrderedDict()
|
||||||
|
obs['orientations'] = physics.orientations()
|
||||||
|
obs['height'] = physics.torso_height()
|
||||||
|
obs['velocity'] = physics.velocity()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def get_reward(self, physics):
|
||||||
|
"""Returns a reward to the agent."""
|
||||||
|
standing = rewards.tolerance(physics.torso_height(),
|
||||||
|
bounds=(_STAND_HEIGHT, float('inf')),
|
||||||
|
margin=_STAND_HEIGHT/2)
|
||||||
|
upright = (1 + physics.torso_upright()) / 2
|
||||||
|
stand_reward = (3*standing + upright) / 4
|
||||||
|
if self._move_speed == 0:
|
||||||
|
return stand_reward
|
||||||
|
else:
|
||||||
|
move_reward = rewards.tolerance(physics.horizontal_velocity(),
|
||||||
|
bounds=(self._move_speed, float('inf')),
|
||||||
|
margin=self._move_speed/2,
|
||||||
|
value_at_margin=0.5,
|
||||||
|
sigmoid='linear')
|
||||||
|
return stand_reward * (5*move_reward + 1) / 6
|
70
Dreamer/local_dm_control_suite/walker.xml
Executable file
70
Dreamer/local_dm_control_suite/walker.xml
Executable file
@ -0,0 +1,70 @@
|
|||||||
|
<mujoco model="planar walker">
|
||||||
|
<include file="./common/visual.xml"/>
|
||||||
|
<include file="./common/skybox.xml"/>
|
||||||
|
<include file="./common/materials_white_floor.xml"/>
|
||||||
|
|
||||||
|
<option timestep="0.0025"/>
|
||||||
|
|
||||||
|
<statistic extent="2" center="0 0 1"/>
|
||||||
|
|
||||||
|
<default>
|
||||||
|
<joint damping=".1" armature="0.01" limited="true" solimplimit="0 .99 .01"/>
|
||||||
|
<geom contype="1" conaffinity="0" friction=".7 .1 .1"/>
|
||||||
|
<motor ctrlrange="-1 1" ctrllimited="true"/>
|
||||||
|
<site size="0.01"/>
|
||||||
|
<default class="walker">
|
||||||
|
<geom material="self" type="capsule"/>
|
||||||
|
<joint axis="0 -1 0"/>
|
||||||
|
</default>
|
||||||
|
</default>
|
||||||
|
|
||||||
|
<worldbody>
|
||||||
|
<geom name="floor" type="plane" conaffinity="1" pos="248 0 0" size="250 .8 .2" material="grid" zaxis="0 0 1"/>
|
||||||
|
<body name="torso" pos="0 0 1.3" childclass="walker">
|
||||||
|
<light name="light" pos="0 0 2" mode="trackcom"/>
|
||||||
|
<camera name="side" pos="0 -2 .7" euler="60 0 0" mode="trackcom"/>
|
||||||
|
<camera name="back" pos="-2 0 .5" xyaxes="0 -1 0 1 0 3" mode="trackcom"/>
|
||||||
|
<joint name="rootz" axis="0 0 1" type="slide" limited="false" armature="0" damping="0"/>
|
||||||
|
<joint name="rootx" axis="1 0 0" type="slide" limited="false" armature="0" damping="0"/>
|
||||||
|
<joint name="rooty" axis="0 1 0" type="hinge" limited="false" armature="0" damping="0"/>
|
||||||
|
<geom name="torso" size="0.07 0.3"/>
|
||||||
|
<body name="right_thigh" pos="0 -.05 -0.3">
|
||||||
|
<joint name="right_hip" range="-20 100"/>
|
||||||
|
<geom name="right_thigh" pos="0 0 -0.225" size="0.05 0.225"/>
|
||||||
|
<body name="right_leg" pos="0 0 -0.7">
|
||||||
|
<joint name="right_knee" pos="0 0 0.25" range="-150 0"/>
|
||||||
|
<geom name="right_leg" size="0.04 0.25"/>
|
||||||
|
<body name="right_foot" pos="0.06 0 -0.25">
|
||||||
|
<joint name="right_ankle" pos="-0.06 0 0" range="-45 45"/>
|
||||||
|
<geom name="right_foot" zaxis="1 0 0" size="0.05 0.1"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
<body name="left_thigh" pos="0 .05 -0.3" >
|
||||||
|
<joint name="left_hip" range="-20 100"/>
|
||||||
|
<geom name="left_thigh" pos="0 0 -0.225" size="0.05 0.225"/>
|
||||||
|
<body name="left_leg" pos="0 0 -0.7">
|
||||||
|
<joint name="left_knee" pos="0 0 0.25" range="-150 0"/>
|
||||||
|
<geom name="left_leg" size="0.04 0.25"/>
|
||||||
|
<body name="left_foot" pos="0.06 0 -0.25">
|
||||||
|
<joint name="left_ankle" pos="-0.06 0 0" range="-45 45"/>
|
||||||
|
<geom name="left_foot" zaxis="1 0 0" size="0.05 0.1"/>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</body>
|
||||||
|
</worldbody>
|
||||||
|
|
||||||
|
<sensor>
|
||||||
|
<subtreelinvel name="torso_subtreelinvel" body="torso"/>
|
||||||
|
</sensor>
|
||||||
|
|
||||||
|
<actuator>
|
||||||
|
<motor name="right_hip" joint="right_hip" gear="100"/>
|
||||||
|
<motor name="right_knee" joint="right_knee" gear="50"/>
|
||||||
|
<motor name="right_ankle" joint="right_ankle" gear="20"/>
|
||||||
|
<motor name="left_hip" joint="left_hip" gear="100"/>
|
||||||
|
<motor name="left_knee" joint="left_knee" gear="50"/>
|
||||||
|
<motor name="left_ankle" joint="left_ankle" gear="20"/>
|
||||||
|
</actuator>
|
||||||
|
</mujoco>
|
16
Dreamer/local_dm_control_suite/wrappers/__init__.py
Executable file
16
Dreamer/local_dm_control_suite/wrappers/__init__.py
Executable file
@ -0,0 +1,16 @@
|
|||||||
|
# Copyright 2018 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Environment wrappers used to extend or modify environment behaviour."""
|
74
Dreamer/local_dm_control_suite/wrappers/action_noise.py
Executable file
74
Dreamer/local_dm_control_suite/wrappers/action_noise.py
Executable file
@ -0,0 +1,74 @@
|
|||||||
|
# Copyright 2018 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Wrapper control suite environments that adds Gaussian noise to actions."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import dm_env
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
_BOUNDS_MUST_BE_FINITE = (
|
||||||
|
'All bounds in `env.action_spec()` must be finite, got: {action_spec}')
|
||||||
|
|
||||||
|
|
||||||
|
class Wrapper(dm_env.Environment):
|
||||||
|
"""Wraps a control environment and adds Gaussian noise to actions."""
|
||||||
|
|
||||||
|
def __init__(self, env, scale=0.01):
|
||||||
|
"""Initializes a new action noise Wrapper.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env: The control suite environment to wrap.
|
||||||
|
scale: The standard deviation of the noise, expressed as a fraction
|
||||||
|
of the max-min range for each action dimension.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If any of the action dimensions of the wrapped environment are
|
||||||
|
unbounded.
|
||||||
|
"""
|
||||||
|
action_spec = env.action_spec()
|
||||||
|
if not (np.all(np.isfinite(action_spec.minimum)) and
|
||||||
|
np.all(np.isfinite(action_spec.maximum))):
|
||||||
|
raise ValueError(_BOUNDS_MUST_BE_FINITE.format(action_spec=action_spec))
|
||||||
|
self._minimum = action_spec.minimum
|
||||||
|
self._maximum = action_spec.maximum
|
||||||
|
self._noise_std = scale * (action_spec.maximum - action_spec.minimum)
|
||||||
|
self._env = env
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
noisy_action = action + self._env.task.random.normal(scale=self._noise_std)
|
||||||
|
# Clip the noisy actions in place so that they fall within the bounds
|
||||||
|
# specified by the `action_spec`. Note that MuJoCo implicitly clips out-of-
|
||||||
|
# bounds control inputs, but we also clip here in case the actions do not
|
||||||
|
# correspond directly to MuJoCo actuators, or if there are other wrapper
|
||||||
|
# layers that expect the actions to be within bounds.
|
||||||
|
np.clip(noisy_action, self._minimum, self._maximum, out=noisy_action)
|
||||||
|
return self._env.step(noisy_action)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
return self._env.reset()
|
||||||
|
|
||||||
|
def observation_spec(self):
|
||||||
|
return self._env.observation_spec()
|
||||||
|
|
||||||
|
def action_spec(self):
|
||||||
|
return self._env.action_spec()
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
136
Dreamer/local_dm_control_suite/wrappers/action_noise_test.py
Executable file
136
Dreamer/local_dm_control_suite/wrappers/action_noise_test.py
Executable file
@ -0,0 +1,136 @@
|
|||||||
|
# Copyright 2018 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Tests for the action noise wrapper."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
# Internal dependencies.
|
||||||
|
from absl.testing import absltest
|
||||||
|
from absl.testing import parameterized
|
||||||
|
from dm_control.rl import control
|
||||||
|
from dm_control.suite.wrappers import action_noise
|
||||||
|
from dm_env import specs
|
||||||
|
import mock
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class ActionNoiseTest(parameterized.TestCase):
|
||||||
|
|
||||||
|
def make_action_spec(self, lower=(-1.,), upper=(1.,)):
|
||||||
|
lower, upper = np.broadcast_arrays(lower, upper)
|
||||||
|
return specs.BoundedArray(
|
||||||
|
shape=lower.shape, dtype=float, minimum=lower, maximum=upper)
|
||||||
|
|
||||||
|
def make_mock_env(self, action_spec=None):
|
||||||
|
action_spec = action_spec or self.make_action_spec()
|
||||||
|
env = mock.Mock(spec=control.Environment)
|
||||||
|
env.action_spec.return_value = action_spec
|
||||||
|
return env
|
||||||
|
|
||||||
|
def assertStepCalledOnceWithCorrectAction(self, env, expected_action):
|
||||||
|
# NB: `assert_called_once_with()` doesn't support numpy arrays.
|
||||||
|
env.step.assert_called_once()
|
||||||
|
actual_action = env.step.call_args_list[0][0][0]
|
||||||
|
np.testing.assert_array_equal(expected_action, actual_action)
|
||||||
|
|
||||||
|
@parameterized.parameters([
|
||||||
|
dict(lower=np.r_[-1., 0.], upper=np.r_[1., 2.], scale=0.05),
|
||||||
|
dict(lower=np.r_[-1., 0.], upper=np.r_[1., 2.], scale=0.),
|
||||||
|
dict(lower=np.r_[-1., 0.], upper=np.r_[-1., 0.], scale=0.05),
|
||||||
|
])
|
||||||
|
def test_step(self, lower, upper, scale):
|
||||||
|
seed = 0
|
||||||
|
std = scale * (upper - lower)
|
||||||
|
expected_noise = np.random.RandomState(seed).normal(scale=std)
|
||||||
|
action = np.random.RandomState(seed).uniform(lower, upper)
|
||||||
|
expected_noisy_action = np.clip(action + expected_noise, lower, upper)
|
||||||
|
task = mock.Mock(spec=control.Task)
|
||||||
|
task.random = np.random.RandomState(seed)
|
||||||
|
action_spec = self.make_action_spec(lower=lower, upper=upper)
|
||||||
|
env = self.make_mock_env(action_spec=action_spec)
|
||||||
|
env.task = task
|
||||||
|
wrapped_env = action_noise.Wrapper(env, scale=scale)
|
||||||
|
time_step = wrapped_env.step(action)
|
||||||
|
self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action)
|
||||||
|
self.assertIs(time_step, env.step(expected_noisy_action))
|
||||||
|
|
||||||
|
@parameterized.named_parameters([
|
||||||
|
dict(testcase_name='within_bounds', action=np.r_[-1.], noise=np.r_[0.1]),
|
||||||
|
dict(testcase_name='below_lower', action=np.r_[-1.], noise=np.r_[-0.1]),
|
||||||
|
dict(testcase_name='above_upper', action=np.r_[1.], noise=np.r_[0.1]),
|
||||||
|
])
|
||||||
|
def test_action_clipping(self, action, noise):
|
||||||
|
lower = -1.
|
||||||
|
upper = 1.
|
||||||
|
expected_noisy_action = np.clip(action + noise, lower, upper)
|
||||||
|
task = mock.Mock(spec=control.Task)
|
||||||
|
task.random = mock.Mock(spec=np.random.RandomState)
|
||||||
|
task.random.normal.return_value = noise
|
||||||
|
action_spec = self.make_action_spec(lower=lower, upper=upper)
|
||||||
|
env = self.make_mock_env(action_spec=action_spec)
|
||||||
|
env.task = task
|
||||||
|
wrapped_env = action_noise.Wrapper(env)
|
||||||
|
time_step = wrapped_env.step(action)
|
||||||
|
self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action)
|
||||||
|
self.assertIs(time_step, env.step(expected_noisy_action))
|
||||||
|
|
||||||
|
@parameterized.parameters([
|
||||||
|
dict(lower=np.r_[-1., 0.], upper=np.r_[1., np.inf]),
|
||||||
|
dict(lower=np.r_[np.nan, 0.], upper=np.r_[1., 2.]),
|
||||||
|
])
|
||||||
|
def test_error_if_action_bounds_non_finite(self, lower, upper):
|
||||||
|
action_spec = self.make_action_spec(lower=lower, upper=upper)
|
||||||
|
env = self.make_mock_env(action_spec=action_spec)
|
||||||
|
with self.assertRaisesWithLiteralMatch(
|
||||||
|
ValueError,
|
||||||
|
action_noise._BOUNDS_MUST_BE_FINITE.format(action_spec=action_spec)):
|
||||||
|
_ = action_noise.Wrapper(env)
|
||||||
|
|
||||||
|
def test_reset(self):
|
||||||
|
env = self.make_mock_env()
|
||||||
|
wrapped_env = action_noise.Wrapper(env)
|
||||||
|
time_step = wrapped_env.reset()
|
||||||
|
env.reset.assert_called_once_with()
|
||||||
|
self.assertIs(time_step, env.reset())
|
||||||
|
|
||||||
|
def test_observation_spec(self):
|
||||||
|
env = self.make_mock_env()
|
||||||
|
wrapped_env = action_noise.Wrapper(env)
|
||||||
|
observation_spec = wrapped_env.observation_spec()
|
||||||
|
env.observation_spec.assert_called_once_with()
|
||||||
|
self.assertIs(observation_spec, env.observation_spec())
|
||||||
|
|
||||||
|
def test_action_spec(self):
|
||||||
|
env = self.make_mock_env()
|
||||||
|
wrapped_env = action_noise.Wrapper(env)
|
||||||
|
# `env.action_spec()` is called in `Wrapper.__init__()`
|
||||||
|
env.action_spec.reset_mock()
|
||||||
|
action_spec = wrapped_env.action_spec()
|
||||||
|
env.action_spec.assert_called_once_with()
|
||||||
|
self.assertIs(action_spec, env.action_spec())
|
||||||
|
|
||||||
|
@parameterized.parameters(['task', 'physics', 'control_timestep'])
|
||||||
|
def test_getattr(self, attribute_name):
|
||||||
|
env = self.make_mock_env()
|
||||||
|
wrapped_env = action_noise.Wrapper(env)
|
||||||
|
attr = getattr(wrapped_env, attribute_name)
|
||||||
|
self.assertIs(attr, getattr(env, attribute_name))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
absltest.main()
|
120
Dreamer/local_dm_control_suite/wrappers/pixels.py
Executable file
120
Dreamer/local_dm_control_suite/wrappers/pixels.py
Executable file
@ -0,0 +1,120 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Wrapper that adds pixel observations to a control environment."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
import dm_env
|
||||||
|
from dm_env import specs
|
||||||
|
|
||||||
|
STATE_KEY = 'state'
|
||||||
|
|
||||||
|
|
||||||
|
class Wrapper(dm_env.Environment):
|
||||||
|
"""Wraps a control environment and adds a rendered pixel observation."""
|
||||||
|
|
||||||
|
def __init__(self, env, pixels_only=True, render_kwargs=None,
|
||||||
|
observation_key='pixels'):
|
||||||
|
"""Initializes a new pixel Wrapper.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env: The environment to wrap.
|
||||||
|
pixels_only: If True (default), the original set of 'state' observations
|
||||||
|
returned by the wrapped environment will be discarded, and the
|
||||||
|
`OrderedDict` of observations will only contain pixels. If False, the
|
||||||
|
`OrderedDict` will contain the original observations as well as the
|
||||||
|
pixel observations.
|
||||||
|
render_kwargs: Optional `dict` containing keyword arguments passed to the
|
||||||
|
`mujoco.Physics.render` method.
|
||||||
|
observation_key: Optional custom string specifying the pixel observation's
|
||||||
|
key in the `OrderedDict` of observations. Defaults to 'pixels'.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If `env`'s observation spec is not compatible with the
|
||||||
|
wrapper. Supported formats are a single array, or a dict of arrays.
|
||||||
|
ValueError: If `env`'s observation already contains the specified
|
||||||
|
`observation_key`.
|
||||||
|
"""
|
||||||
|
if render_kwargs is None:
|
||||||
|
render_kwargs = {}
|
||||||
|
|
||||||
|
wrapped_observation_spec = env.observation_spec()
|
||||||
|
|
||||||
|
if isinstance(wrapped_observation_spec, specs.Array):
|
||||||
|
self._observation_is_dict = False
|
||||||
|
invalid_keys = set([STATE_KEY])
|
||||||
|
elif isinstance(wrapped_observation_spec, collections.MutableMapping):
|
||||||
|
self._observation_is_dict = True
|
||||||
|
invalid_keys = set(wrapped_observation_spec.keys())
|
||||||
|
else:
|
||||||
|
raise ValueError('Unsupported observation spec structure.')
|
||||||
|
|
||||||
|
if not pixels_only and observation_key in invalid_keys:
|
||||||
|
raise ValueError('Duplicate or reserved observation key {!r}.'
|
||||||
|
.format(observation_key))
|
||||||
|
|
||||||
|
if pixels_only:
|
||||||
|
self._observation_spec = collections.OrderedDict()
|
||||||
|
elif self._observation_is_dict:
|
||||||
|
self._observation_spec = wrapped_observation_spec.copy()
|
||||||
|
else:
|
||||||
|
self._observation_spec = collections.OrderedDict()
|
||||||
|
self._observation_spec[STATE_KEY] = wrapped_observation_spec
|
||||||
|
|
||||||
|
# Extend observation spec.
|
||||||
|
pixels = env.physics.render(**render_kwargs)
|
||||||
|
pixels_spec = specs.Array(
|
||||||
|
shape=pixels.shape, dtype=pixels.dtype, name=observation_key)
|
||||||
|
self._observation_spec[observation_key] = pixels_spec
|
||||||
|
|
||||||
|
self._env = env
|
||||||
|
self._pixels_only = pixels_only
|
||||||
|
self._render_kwargs = render_kwargs
|
||||||
|
self._observation_key = observation_key
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
time_step = self._env.reset()
|
||||||
|
return self._add_pixel_observation(time_step)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
time_step = self._env.step(action)
|
||||||
|
return self._add_pixel_observation(time_step)
|
||||||
|
|
||||||
|
def observation_spec(self):
|
||||||
|
return self._observation_spec
|
||||||
|
|
||||||
|
def action_spec(self):
|
||||||
|
return self._env.action_spec()
|
||||||
|
|
||||||
|
def _add_pixel_observation(self, time_step):
|
||||||
|
if self._pixels_only:
|
||||||
|
observation = collections.OrderedDict()
|
||||||
|
elif self._observation_is_dict:
|
||||||
|
observation = type(time_step.observation)(time_step.observation)
|
||||||
|
else:
|
||||||
|
observation = collections.OrderedDict()
|
||||||
|
observation[STATE_KEY] = time_step.observation
|
||||||
|
|
||||||
|
pixels = self._env.physics.render(**self._render_kwargs)
|
||||||
|
observation[self._observation_key] = pixels
|
||||||
|
return time_step._replace(observation=observation)
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
133
Dreamer/local_dm_control_suite/wrappers/pixels_test.py
Executable file
133
Dreamer/local_dm_control_suite/wrappers/pixels_test.py
Executable file
@ -0,0 +1,133 @@
|
|||||||
|
# Copyright 2017 The dm_control Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
"""Tests for the pixel wrapper."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
# Internal dependencies.
|
||||||
|
from absl.testing import absltest
|
||||||
|
from absl.testing import parameterized
|
||||||
|
from local_dm_control_suite import cartpole
|
||||||
|
from dm_control.suite.wrappers import pixels
|
||||||
|
import dm_env
|
||||||
|
from dm_env import specs
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class FakePhysics(object):
|
||||||
|
|
||||||
|
def render(self, *args, **kwargs):
|
||||||
|
del args
|
||||||
|
del kwargs
|
||||||
|
return np.zeros((4, 5, 3), dtype=np.uint8)
|
||||||
|
|
||||||
|
|
||||||
|
class FakeArrayObservationEnvironment(dm_env.Environment):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.physics = FakePhysics()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
return dm_env.restart(np.zeros((2,)))
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
del action
|
||||||
|
return dm_env.transition(0.0, np.zeros((2,)))
|
||||||
|
|
||||||
|
def action_spec(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def observation_spec(self):
|
||||||
|
return specs.Array(shape=(2,), dtype=np.float)
|
||||||
|
|
||||||
|
|
||||||
|
class PixelsTest(parameterized.TestCase):
|
||||||
|
|
||||||
|
@parameterized.parameters(True, False)
|
||||||
|
def test_dict_observation(self, pixels_only):
|
||||||
|
pixel_key = 'rgb'
|
||||||
|
|
||||||
|
env = cartpole.swingup()
|
||||||
|
|
||||||
|
# Make sure we are testing the right environment for the test.
|
||||||
|
observation_spec = env.observation_spec()
|
||||||
|
self.assertIsInstance(observation_spec, collections.OrderedDict)
|
||||||
|
|
||||||
|
width = 320
|
||||||
|
height = 240
|
||||||
|
|
||||||
|
# The wrapper should only add one observation.
|
||||||
|
wrapped = pixels.Wrapper(env,
|
||||||
|
observation_key=pixel_key,
|
||||||
|
pixels_only=pixels_only,
|
||||||
|
render_kwargs={'width': width, 'height': height})
|
||||||
|
|
||||||
|
wrapped_observation_spec = wrapped.observation_spec()
|
||||||
|
self.assertIsInstance(wrapped_observation_spec, collections.OrderedDict)
|
||||||
|
|
||||||
|
if pixels_only:
|
||||||
|
self.assertLen(wrapped_observation_spec, 1)
|
||||||
|
self.assertEqual([pixel_key], list(wrapped_observation_spec.keys()))
|
||||||
|
else:
|
||||||
|
expected_length = len(observation_spec) + 1
|
||||||
|
self.assertLen(wrapped_observation_spec, expected_length)
|
||||||
|
expected_keys = list(observation_spec.keys()) + [pixel_key]
|
||||||
|
self.assertEqual(expected_keys, list(wrapped_observation_spec.keys()))
|
||||||
|
|
||||||
|
# Check that the added spec item is consistent with the added observation.
|
||||||
|
time_step = wrapped.reset()
|
||||||
|
rgb_observation = time_step.observation[pixel_key]
|
||||||
|
wrapped_observation_spec[pixel_key].validate(rgb_observation)
|
||||||
|
|
||||||
|
self.assertEqual(rgb_observation.shape, (height, width, 3))
|
||||||
|
self.assertEqual(rgb_observation.dtype, np.uint8)
|
||||||
|
|
||||||
|
@parameterized.parameters(True, False)
|
||||||
|
def test_single_array_observation(self, pixels_only):
|
||||||
|
pixel_key = 'depth'
|
||||||
|
|
||||||
|
env = FakeArrayObservationEnvironment()
|
||||||
|
observation_spec = env.observation_spec()
|
||||||
|
self.assertIsInstance(observation_spec, specs.Array)
|
||||||
|
|
||||||
|
wrapped = pixels.Wrapper(env, observation_key=pixel_key,
|
||||||
|
pixels_only=pixels_only)
|
||||||
|
wrapped_observation_spec = wrapped.observation_spec()
|
||||||
|
self.assertIsInstance(wrapped_observation_spec, collections.OrderedDict)
|
||||||
|
|
||||||
|
if pixels_only:
|
||||||
|
self.assertLen(wrapped_observation_spec, 1)
|
||||||
|
self.assertEqual([pixel_key], list(wrapped_observation_spec.keys()))
|
||||||
|
else:
|
||||||
|
self.assertLen(wrapped_observation_spec, 2)
|
||||||
|
self.assertEqual([pixels.STATE_KEY, pixel_key],
|
||||||
|
list(wrapped_observation_spec.keys()))
|
||||||
|
|
||||||
|
time_step = wrapped.reset()
|
||||||
|
|
||||||
|
depth_observation = time_step.observation[pixel_key]
|
||||||
|
wrapped_observation_spec[pixel_key].validate(depth_observation)
|
||||||
|
|
||||||
|
self.assertEqual(depth_observation.shape, (4, 5, 3))
|
||||||
|
self.assertEqual(depth_observation.dtype, np.uint8)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
absltest.main()
|
297
Dreamer/models.py
Normal file
297
Dreamer/models.py
Normal file
@ -0,0 +1,297 @@
|
|||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras import layers as tfkl
|
||||||
|
from tensorflow_probability import distributions as tfd
|
||||||
|
from tensorflow.keras.mixed_precision import experimental as prec
|
||||||
|
|
||||||
|
import tools
|
||||||
|
|
||||||
|
|
||||||
|
class RSSM(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, stoch=30, deter=200, hidden=200, act=tf.nn.elu):
|
||||||
|
super().__init__()
|
||||||
|
self._activation = act
|
||||||
|
self._stoch_size = stoch
|
||||||
|
self._deter_size = deter
|
||||||
|
self._hidden_size = hidden
|
||||||
|
self._cell = tfkl.GRUCell(self._deter_size)
|
||||||
|
|
||||||
|
def initial(self, batch_size):
|
||||||
|
dtype = prec.global_policy().compute_dtype
|
||||||
|
return dict(
|
||||||
|
mean=tf.zeros([batch_size, self._stoch_size], dtype),
|
||||||
|
std=tf.zeros([batch_size, self._stoch_size], dtype),
|
||||||
|
stoch=tf.zeros([batch_size, self._stoch_size], dtype),
|
||||||
|
deter=self._cell.get_initial_state(None, batch_size, dtype))
|
||||||
|
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def observe(self, embed, action, state=None):
|
||||||
|
if state is None:
|
||||||
|
state = self.initial(tf.shape(action)[0])
|
||||||
|
embed = tf.transpose(embed, [1, 0, 2])
|
||||||
|
action = tf.transpose(action, [1, 0, 2])
|
||||||
|
post, prior = tools.static_scan(
|
||||||
|
lambda prev, inputs: self.obs_step(
|
||||||
|
prev[0], *inputs),
|
||||||
|
(action, embed), (state, state))
|
||||||
|
post = {k: tf.transpose(v, [1, 0, 2]) for k, v in post.items()}
|
||||||
|
prior = {k: tf.transpose(v, [1, 0, 2]) for k, v in prior.items()}
|
||||||
|
return post, prior
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def imagine(self, action, state=None):
|
||||||
|
if state is None:
|
||||||
|
state = self.initial(tf.shape(action)[0])
|
||||||
|
assert isinstance(state, dict), state
|
||||||
|
action = tf.transpose(action, [1, 0, 2])
|
||||||
|
prior = tools.static_scan(self.img_step, action, state)
|
||||||
|
prior = {k: tf.transpose(v, [1, 0, 2]) for k, v in prior.items()}
|
||||||
|
return prior
|
||||||
|
|
||||||
|
def get_feat(self, state):
|
||||||
|
return tf.concat([state['stoch'], state['deter']], -1)
|
||||||
|
|
||||||
|
def get_dist(self, state):
|
||||||
|
return tfd.MultivariateNormalDiag(state['mean'], state['std'])
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def obs_step(self, prev_state, prev_action, embed):
|
||||||
|
prior = self.img_step(prev_state, prev_action)
|
||||||
|
x = tf.concat([prior['deter'], embed], -1)
|
||||||
|
x = self.get('obs1', tfkl.Dense, self._hidden_size,
|
||||||
|
self._activation)(x)
|
||||||
|
x = self.get('obs2', tfkl.Dense, 2 * self._stoch_size, None)(x)
|
||||||
|
mean, std = tf.split(x, 2, -1)
|
||||||
|
std = tf.nn.softplus(std) + 0.1
|
||||||
|
stoch = self.get_dist({'mean': mean, 'std': std}).sample()
|
||||||
|
post = {'mean': mean, 'std': std,
|
||||||
|
'stoch': stoch, 'deter': prior['deter']}
|
||||||
|
return post, prior
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def img_step(self, prev_state, prev_action):
|
||||||
|
x = tf.concat([prev_state['stoch'], prev_action], -1)
|
||||||
|
x = self.get('img1', tfkl.Dense, self._hidden_size,
|
||||||
|
self._activation)(x)
|
||||||
|
x, deter = self._cell(x, [prev_state['deter']])
|
||||||
|
deter = deter[0] # Keras wraps the state in a list.
|
||||||
|
x = self.get('img2', tfkl.Dense, self._hidden_size,
|
||||||
|
self._activation)(x)
|
||||||
|
x = self.get('img3', tfkl.Dense, 2 * self._stoch_size, None)(x)
|
||||||
|
mean, std = tf.split(x, 2, -1)
|
||||||
|
std = tf.nn.softplus(std) + 0.1
|
||||||
|
stoch = self.get_dist({'mean': mean, 'std': std}).sample()
|
||||||
|
prior = {'mean': mean, 'std': std, 'stoch': stoch, 'deter': deter}
|
||||||
|
return prior
|
||||||
|
|
||||||
|
|
||||||
|
class ConvEncoder(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, depth=32, act=tf.nn.relu, image_size=64):
|
||||||
|
self._act = act
|
||||||
|
self._depth = depth
|
||||||
|
self._image_size = image_size
|
||||||
|
|
||||||
|
if image_size == 64:
|
||||||
|
self._outdim = 32 * self._depth
|
||||||
|
self._kernel_sizes = [4, 4, 4, 4]
|
||||||
|
elif image_size == 32:
|
||||||
|
self._outdim = 8 * self._depth
|
||||||
|
self._kernel_sizes = [3, 3, 3, 3]
|
||||||
|
elif image_size == 84:
|
||||||
|
self._outdim = 72 * self._depth
|
||||||
|
self._kernel_sizes = [4, 4, 4, 4]
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def __call__(self, obs):
|
||||||
|
kwargs = dict(strides=2, activation=self._act)
|
||||||
|
x = tf.reshape(obs['image'], (-1,) + tuple(obs['image'].shape[-3:]))
|
||||||
|
x = self.get('h1', tfkl.Conv2D, 1 * self._depth,
|
||||||
|
self._kernel_sizes[0], **kwargs)(x)
|
||||||
|
x = self.get('h2', tfkl.Conv2D, 2 * self._depth,
|
||||||
|
self._kernel_sizes[1], **kwargs)(x)
|
||||||
|
x = self.get('h3', tfkl.Conv2D, 4 * self._depth,
|
||||||
|
self._kernel_sizes[2], **kwargs)(x)
|
||||||
|
x = self.get('h4', tfkl.Conv2D, 8 * self._depth,
|
||||||
|
self._kernel_sizes[3], **kwargs)(x)
|
||||||
|
shape = tf.concat([tf.shape(obs['image'])[:-3], [self._outdim]], 0)
|
||||||
|
return tf.reshape(x, shape)
|
||||||
|
|
||||||
|
|
||||||
|
class ConvDecoder(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, depth=32, act=tf.nn.relu, shape=(64, 64, 3)):
|
||||||
|
self._act = act
|
||||||
|
self._depth = depth
|
||||||
|
self._shape = shape
|
||||||
|
|
||||||
|
if shape[0] == 64:
|
||||||
|
self._outdim = 32 * self._depth
|
||||||
|
self._kernel_sizes = [5, 5, 6, 6]
|
||||||
|
elif shape[0] == 32:
|
||||||
|
self._outdim = 8 * self._depth
|
||||||
|
self._kernel_sizes = [3, 3, 3, 4]
|
||||||
|
elif shape[0] == 84:
|
||||||
|
self._outdim = 72 * self._depth
|
||||||
|
self._kernel_sizes = [7, 6, 6, 6]
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def __call__(self, features):
|
||||||
|
kwargs = dict(strides=2, activation=self._act)
|
||||||
|
x = self.get('h1', tfkl.Dense, self._outdim, None)(features)
|
||||||
|
x = tf.reshape(x, [-1, 1, 1, self._outdim])
|
||||||
|
x = self.get('h2', tfkl.Conv2DTranspose,
|
||||||
|
4 * self._depth, self._kernel_sizes[0], **kwargs)(x)
|
||||||
|
x = self.get('h3', tfkl.Conv2DTranspose,
|
||||||
|
2 * self._depth, self._kernel_sizes[1], **kwargs)(x)
|
||||||
|
x = self.get('h4', tfkl.Conv2DTranspose,
|
||||||
|
1 * self._depth, self._kernel_sizes[2], **kwargs)(x)
|
||||||
|
x = self.get('h5', tfkl.Conv2DTranspose,
|
||||||
|
self._shape[-1], self._kernel_sizes[3], strides=2)(x)
|
||||||
|
mean = tf.reshape(x, tf.concat(
|
||||||
|
[tf.shape(features)[:-1], self._shape], 0))
|
||||||
|
return tfd.Independent(tfd.Normal(mean, 1), len(self._shape))
|
||||||
|
|
||||||
|
|
||||||
|
class ConvDecoderMask(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, depth=32, act=tf.nn.relu, shape=(64, 64, 3)):
|
||||||
|
self._act = act
|
||||||
|
self._depth = depth
|
||||||
|
self._shape = shape
|
||||||
|
|
||||||
|
if shape[0] == 64:
|
||||||
|
self._outdim = 32 * self._depth
|
||||||
|
self._kernel_sizes = [5, 5, 6, 6]
|
||||||
|
elif shape[0] == 32:
|
||||||
|
self._outdim = 8 * self._depth
|
||||||
|
self._kernel_sizes = [3, 3, 3, 4]
|
||||||
|
elif shape[0] == 84:
|
||||||
|
self._outdim = 72 * self._depth
|
||||||
|
self._kernel_sizes = [7, 6, 6, 6]
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def __call__(self, features):
|
||||||
|
kwargs = dict(strides=2, activation=self._act)
|
||||||
|
x = self.get('h1', tfkl.Dense, self._outdim, None)(features)
|
||||||
|
x = tf.reshape(x, [-1, 1, 1, self._outdim])
|
||||||
|
x = self.get('h2', tfkl.Conv2DTranspose,
|
||||||
|
4 * self._depth, self._kernel_sizes[0], **kwargs)(x)
|
||||||
|
x = self.get('h3', tfkl.Conv2DTranspose,
|
||||||
|
2 * self._depth, self._kernel_sizes[1], **kwargs)(x)
|
||||||
|
x = self.get('h4', tfkl.Conv2DTranspose,
|
||||||
|
1 * self._depth, self._kernel_sizes[2], **kwargs)(x)
|
||||||
|
x = self.get('h5', tfkl.Conv2DTranspose,
|
||||||
|
3 + self._shape[-1], self._kernel_sizes[3], strides=2)(x)
|
||||||
|
mean, mask = tf.split(x, [3, 3], -1)
|
||||||
|
mean = tf.reshape(mean, tf.concat(
|
||||||
|
[tf.shape(features)[:-1], self._shape], 0))
|
||||||
|
mask = tf.reshape(mask, tf.concat(
|
||||||
|
[tf.shape(features)[:-1], self._shape], 0))
|
||||||
|
return tfd.Independent(tfd.Normal(mean, 1), len(self._shape)), mask
|
||||||
|
|
||||||
|
|
||||||
|
class ConvDecoderMaskEnsemble(tools.Module):
|
||||||
|
"""
|
||||||
|
ensemble two convdecoder with <Normal, mask> outputs
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, decoder1, decoder2, precision):
|
||||||
|
self._decoder1 = decoder1
|
||||||
|
self._decoder2 = decoder2
|
||||||
|
self._precision = 'float' + str(precision)
|
||||||
|
self._shape = decoder1._shape
|
||||||
|
|
||||||
|
def __call__(self, feat1, feat2):
|
||||||
|
kwargs = dict(strides=1, activation=tf.nn.sigmoid)
|
||||||
|
pred1, mask1 = self._decoder1(feat1)
|
||||||
|
pred2, mask2 = self._decoder2(feat2)
|
||||||
|
mean1 = pred1.submodules[0].loc
|
||||||
|
mean2 = pred2.submodules[0].loc
|
||||||
|
mask_feat = tf.concat([mask1, mask2], -1)
|
||||||
|
mask = self.get('mask1', tfkl.Conv2D, 1, 1, **kwargs)(mask_feat)
|
||||||
|
mask_use1 = mask
|
||||||
|
mask_use2 = 1-mask
|
||||||
|
mean = mean1 * tf.cast(mask_use1, self._precision) + \
|
||||||
|
mean2 * tf.cast(mask_use2, self._precision)
|
||||||
|
return tfd.Independent(tfd.Normal(mean, 1), len(self._shape)), pred1, pred2, tf.cast(mask_use1, self._precision)
|
||||||
|
|
||||||
|
|
||||||
|
class InverseDecoder(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, shape, layers, units, act=tf.nn.elu):
|
||||||
|
self._shape = shape
|
||||||
|
self._layers = layers
|
||||||
|
self._units = units
|
||||||
|
self._act = act
|
||||||
|
|
||||||
|
def __call__(self, features):
|
||||||
|
x = tf.concat([features[:, :-1], features[:, 1:]], -1)
|
||||||
|
for index in range(self._layers):
|
||||||
|
x = self.get(f'h{index}', tfkl.Dense, self._units, self._act)(x)
|
||||||
|
x = self.get(f'hout', tfkl.Dense, np.prod(self._shape))(x)
|
||||||
|
return tfd.Independent(tfd.Normal(x, 1), 1)
|
||||||
|
|
||||||
|
|
||||||
|
class DenseDecoder(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, shape, layers, units, dist='normal', act=tf.nn.elu):
|
||||||
|
self._shape = shape
|
||||||
|
self._layers = layers
|
||||||
|
self._units = units
|
||||||
|
self._dist = dist
|
||||||
|
self._act = act
|
||||||
|
|
||||||
|
def __call__(self, features):
|
||||||
|
x = features
|
||||||
|
for index in range(self._layers):
|
||||||
|
x = self.get(f'h{index}', tfkl.Dense, self._units, self._act)(x)
|
||||||
|
x = self.get(f'hout', tfkl.Dense, np.prod(self._shape))(x)
|
||||||
|
x = tf.reshape(x, tf.concat([tf.shape(features)[:-1], self._shape], 0))
|
||||||
|
if self._dist == 'normal':
|
||||||
|
return tfd.Independent(tfd.Normal(x, 1), len(self._shape))
|
||||||
|
if self._dist == 'binary':
|
||||||
|
return tfd.Independent(tfd.Bernoulli(x), len(self._shape))
|
||||||
|
raise NotImplementedError(self._dist)
|
||||||
|
|
||||||
|
|
||||||
|
class ActionDecoder(tools.Module):
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, size, layers, units, dist='tanh_normal', act=tf.nn.elu,
|
||||||
|
min_std=1e-4, init_std=5, mean_scale=5):
|
||||||
|
self._size = size
|
||||||
|
self._layers = layers
|
||||||
|
self._units = units
|
||||||
|
self._dist = dist
|
||||||
|
self._act = act
|
||||||
|
self._min_std = min_std
|
||||||
|
self._init_std = init_std
|
||||||
|
self._mean_scale = mean_scale
|
||||||
|
|
||||||
|
def __call__(self, features):
|
||||||
|
raw_init_std = np.log(np.exp(self._init_std) - 1)
|
||||||
|
x = features
|
||||||
|
for index in range(self._layers):
|
||||||
|
x = self.get(f'h{index}', tfkl.Dense, self._units, self._act)(x)
|
||||||
|
if self._dist == 'tanh_normal':
|
||||||
|
# https://www.desmos.com/calculator/rcmcf5jwe7
|
||||||
|
x = self.get(f'hout', tfkl.Dense, 2 * self._size)(x)
|
||||||
|
mean, std = tf.split(x, 2, -1)
|
||||||
|
mean = self._mean_scale * tf.tanh(mean / self._mean_scale)
|
||||||
|
std = tf.nn.softplus(std + raw_init_std) + self._min_std
|
||||||
|
dist = tfd.Normal(mean, std)
|
||||||
|
dist = tfd.TransformedDistribution(dist, tools.TanhBijector())
|
||||||
|
dist = tfd.Independent(dist, 1)
|
||||||
|
dist = tools.SampleDist(dist)
|
||||||
|
elif self._dist == 'onehot':
|
||||||
|
x = self.get(f'hout', tfkl.Dense, self._size)(x)
|
||||||
|
dist = tools.OneHotDist(x)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(dist)
|
||||||
|
return dist
|
121
Dreamer/run.py
Normal file
121
Dreamer/run.py
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
import wrappers
|
||||||
|
import tools
|
||||||
|
import tensorflow as tf
|
||||||
|
import argparse
|
||||||
|
import functools
|
||||||
|
import yaml
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from tensorflow.keras.mixed_precision import experimental as prec
|
||||||
|
from dreamers import Dreamer, SeparationDreamer, InverseDreamer
|
||||||
|
from env_tools import count_steps, make_env
|
||||||
|
|
||||||
|
METHOD2DREAMER = {
|
||||||
|
'dreamer': Dreamer,
|
||||||
|
'tia': SeparationDreamer,
|
||||||
|
'inverse': InverseDreamer
|
||||||
|
}
|
||||||
|
|
||||||
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||||
|
os.environ['MUJOCO_GL'] = 'egl'
|
||||||
|
tf.get_logger().setLevel('ERROR')
|
||||||
|
sys.path.append(str(pathlib.Path(__file__).parent))
|
||||||
|
|
||||||
|
|
||||||
|
def main(method, config):
|
||||||
|
|
||||||
|
if method == 'separation':
|
||||||
|
config.logdir = os.path.join(
|
||||||
|
config.logdir, config.task,
|
||||||
|
'separation' + '_' + str(config.disen_neg_rew_scale) +
|
||||||
|
'_' + str(config.disen_rec_scale),
|
||||||
|
str(config.seed))
|
||||||
|
else:
|
||||||
|
config.logdir = os.path.join(
|
||||||
|
config.logdir, config.task,
|
||||||
|
method,
|
||||||
|
str(config.seed))
|
||||||
|
|
||||||
|
logdir = pathlib.Path(config.logdir)
|
||||||
|
logdir.mkdir(parents=True, exist_ok=True)
|
||||||
|
snapshot_dir = os.path.join(config.logdir, 'snapshots')
|
||||||
|
snapshot_dir = pathlib.Path(snapshot_dir)
|
||||||
|
snapshot_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(os.path.join(config.logdir, 'config.yaml'), 'w') as f:
|
||||||
|
yaml.dump(vars(config), f, sort_keys=False)
|
||||||
|
|
||||||
|
if config.gpu_growth:
|
||||||
|
for gpu in tf.config.experimental.list_physical_devices('GPU'):
|
||||||
|
tf.config.experimental.set_memory_growth(gpu, True)
|
||||||
|
assert config.precision in (16, 32), config.precision
|
||||||
|
if config.precision == 16:
|
||||||
|
prec.set_policy(prec.Policy('mixed_float16'))
|
||||||
|
config.steps = int(config.steps)
|
||||||
|
config.logdir = logdir
|
||||||
|
print('Logdir', config.logdir)
|
||||||
|
|
||||||
|
# Create environments.
|
||||||
|
datadir = config.logdir / 'episodes'
|
||||||
|
writer = tf.summary.create_file_writer(
|
||||||
|
str(config.logdir), max_queue=1000, flush_millis=20000)
|
||||||
|
writer.set_as_default()
|
||||||
|
train_envs = [wrappers.Async(lambda: make_env(
|
||||||
|
config, writer, 'train', datadir, config.video_dir, store=True), config.parallel)
|
||||||
|
for _ in range(config.envs)]
|
||||||
|
test_envs = [wrappers.Async(lambda: make_env(
|
||||||
|
config, writer, 'test', datadir, config.video_dir, store=False), config.parallel)
|
||||||
|
for _ in range(config.envs)]
|
||||||
|
actspace = train_envs[0].action_space
|
||||||
|
|
||||||
|
# Prefill dataset with random episodes.
|
||||||
|
step = count_steps(datadir, config)
|
||||||
|
prefill = max(0, config.prefill - step)
|
||||||
|
print(f'Prefill dataset with {prefill} steps.')
|
||||||
|
def random_agent(o, d, _): return ([actspace.sample() for _ in d], None)
|
||||||
|
tools.simulate(random_agent, train_envs, prefill / config.action_repeat)
|
||||||
|
writer.flush()
|
||||||
|
|
||||||
|
# Train and regularly evaluate the agent.
|
||||||
|
step = count_steps(datadir, config)
|
||||||
|
print(f'Simulating agent for {config.steps-step} steps.')
|
||||||
|
DreamerModel = METHOD2DREAMER[method]
|
||||||
|
agent = DreamerModel(config, datadir, actspace, writer)
|
||||||
|
if (config.logdir / 'variables.pkl').exists():
|
||||||
|
print('Load checkpoint.')
|
||||||
|
agent.load(config.logdir / 'variables.pkl')
|
||||||
|
state = None
|
||||||
|
should_snapshot = tools.Every(config.snapshot_every)
|
||||||
|
while step < config.steps:
|
||||||
|
print('Start evaluation.')
|
||||||
|
tools.simulate(
|
||||||
|
functools.partial(agent, training=False), test_envs, episodes=1)
|
||||||
|
writer.flush()
|
||||||
|
print('Start collection.')
|
||||||
|
steps = config.eval_every // config.action_repeat
|
||||||
|
state = tools.simulate(agent, train_envs, steps, state=state)
|
||||||
|
step = count_steps(datadir, config)
|
||||||
|
agent.save(config.logdir / 'variables.pkl')
|
||||||
|
if should_snapshot(step):
|
||||||
|
agent.save(snapshot_dir / ('variables_' + str(step) + '.pkl'))
|
||||||
|
for env in train_envs + test_envs:
|
||||||
|
env.close()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
'--method', type=str, choices=['dreamer', 'inverse', 'tia'], required=True)
|
||||||
|
parser.add_argument('--configs', nargs='+', required=True)
|
||||||
|
args, remaining = parser.parse_known_args()
|
||||||
|
config_path = 'train_configs/' + args.method + '.yaml'
|
||||||
|
configs = yaml.safe_load(
|
||||||
|
(pathlib.Path(__file__).parent / config_path).read_text())
|
||||||
|
config_ = {}
|
||||||
|
for name in args.configs:
|
||||||
|
config_.update(configs[name])
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
for key, value in config_.items():
|
||||||
|
parser.add_argument(
|
||||||
|
f'--{key}', type=tools.args_type(value), default=value)
|
||||||
|
main(args.method, parser.parse_args(remaining))
|
467
Dreamer/tools.py
Normal file
467
Dreamer/tools.py
Normal file
@ -0,0 +1,467 @@
|
|||||||
|
import datetime
|
||||||
|
import io
|
||||||
|
import pathlib
|
||||||
|
import pickle
|
||||||
|
import re
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
import tensorflow.compat.v1 as tf1 # pylint: disable=E
|
||||||
|
import tensorflow_probability as tfp
|
||||||
|
from tensorflow.keras.mixed_precision import experimental as prec
|
||||||
|
from tensorflow_probability import distributions as tfd
|
||||||
|
|
||||||
|
|
||||||
|
class AttrDict(dict):
|
||||||
|
|
||||||
|
__setattr__ = dict.__setitem__
|
||||||
|
__getattr__ = dict.__getitem__
|
||||||
|
|
||||||
|
def from_dict(self, src_dict):
|
||||||
|
for key in src_dict:
|
||||||
|
setattr(self, key, src_dict[key])
|
||||||
|
|
||||||
|
|
||||||
|
class Module(tf.Module):
|
||||||
|
|
||||||
|
def save(self, filename):
|
||||||
|
values = tf.nest.map_structure(lambda x: x.numpy(), self.variables)
|
||||||
|
with pathlib.Path(filename).open('wb') as f:
|
||||||
|
pickle.dump(values, f)
|
||||||
|
|
||||||
|
def load(self, filename):
|
||||||
|
with pathlib.Path(filename).open('rb') as f:
|
||||||
|
values = pickle.load(f)
|
||||||
|
tf.nest.map_structure(lambda x, y: x.assign(y), self.variables, values)
|
||||||
|
|
||||||
|
def get(self, name, ctor, *args, **kwargs):
|
||||||
|
# Create or get layer by name to avoid mentioning it in the constructor.
|
||||||
|
if not hasattr(self, '_modules'):
|
||||||
|
self._modules = {}
|
||||||
|
if name not in self._modules:
|
||||||
|
self._modules[name] = ctor(*args, **kwargs)
|
||||||
|
return self._modules[name]
|
||||||
|
|
||||||
|
|
||||||
|
def nest_summary(structure):
|
||||||
|
if isinstance(structure, dict):
|
||||||
|
return {k: nest_summary(v) for k, v in structure.items()}
|
||||||
|
if isinstance(structure, list):
|
||||||
|
return [nest_summary(v) for v in structure]
|
||||||
|
if hasattr(structure, 'shape'):
|
||||||
|
return str(structure.shape).replace(', ', 'x').strip('(), ')
|
||||||
|
return '?'
|
||||||
|
|
||||||
|
|
||||||
|
def graph_summary(writer, fn, step, *args):
|
||||||
|
def inner(*args):
|
||||||
|
tf.summary.experimental.set_step(step)
|
||||||
|
with writer.as_default():
|
||||||
|
fn(*args)
|
||||||
|
return tf.numpy_function(inner, args, [])
|
||||||
|
|
||||||
|
|
||||||
|
def video_summary(name, video, step=None, fps=20):
|
||||||
|
if isinstance(name, type(np.zeros(1))):
|
||||||
|
name = str(name)
|
||||||
|
else:
|
||||||
|
name = name if isinstance(name, str) else name.decode('utf-8')
|
||||||
|
if np.issubdtype(video.dtype, np.floating):
|
||||||
|
video = np.clip(255 * video, 0, 255).astype(np.uint8)
|
||||||
|
B, T, H, W, C = video.shape
|
||||||
|
try:
|
||||||
|
frames = video.transpose((1, 2, 0, 3, 4)).reshape((T, H, B * W, C))
|
||||||
|
summary = tf1.Summary()
|
||||||
|
image = tf1.Summary.Image(height=B * H, width=T * W, colorspace=C)
|
||||||
|
image.encoded_image_string = encode_gif(frames, fps)
|
||||||
|
summary.value.add(tag=name + '/gif', image=image)
|
||||||
|
tf.summary.experimental.write_raw_pb(summary.SerializeToString(), step)
|
||||||
|
except (IOError, OSError) as e:
|
||||||
|
print('GIF summaries require ffmpeg in $PATH.', e)
|
||||||
|
frames = video.transpose((0, 2, 1, 3, 4)).reshape((1, B * H, T * W, C))
|
||||||
|
tf.summary.image(name + '/grid', frames, step)
|
||||||
|
|
||||||
|
|
||||||
|
def encode_gif(frames, fps):
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
|
h, w, c = frames[0].shape
|
||||||
|
pxfmt = {1: 'gray', 3: 'rgb24'}[c]
|
||||||
|
cmd = ' '.join([
|
||||||
|
f'ffmpeg -y -f rawvideo -vcodec rawvideo',
|
||||||
|
f'-r {fps:.02f} -s {w}x{h} -pix_fmt {pxfmt} -i - -filter_complex',
|
||||||
|
f'[0:v]split[x][z];[z]palettegen[y];[x]fifo[x];[x][y]paletteuse',
|
||||||
|
f'-r {fps:.02f} -f gif -'])
|
||||||
|
proc = Popen(cmd.split(' '), stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||||
|
for image in frames:
|
||||||
|
proc.stdin.write(image.tostring())
|
||||||
|
out, err = proc.communicate()
|
||||||
|
if proc.returncode:
|
||||||
|
raise IOError('\n'.join([' '.join(cmd), err.decode('utf8')]))
|
||||||
|
del proc
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def simulate(agent, envs, steps=0, episodes=0, state=None):
|
||||||
|
# Initialize or unpack simulation state.
|
||||||
|
if state is None:
|
||||||
|
step, episode = 0, 0
|
||||||
|
done = np.ones(len(envs), np.bool)
|
||||||
|
length = np.zeros(len(envs), np.int32)
|
||||||
|
obs = [None] * len(envs)
|
||||||
|
agent_state = None
|
||||||
|
else:
|
||||||
|
step, episode, done, length, obs, agent_state = state
|
||||||
|
while (steps and step < steps) or (episodes and episode < episodes):
|
||||||
|
# Reset envs if necessary.
|
||||||
|
if done.any():
|
||||||
|
indices = [index for index, d in enumerate(done) if d]
|
||||||
|
promises = [envs[i].reset(blocking=False) for i in indices]
|
||||||
|
for index, promise in zip(indices, promises):
|
||||||
|
obs[index] = promise()
|
||||||
|
# Step agents.
|
||||||
|
# if use augmentation, need to modify dreamer.policy or here.
|
||||||
|
obs = {k: np.stack([o[k] for o in obs]) for k in obs[0]}
|
||||||
|
action, agent_state = agent(obs, done, agent_state)
|
||||||
|
action = np.array(action)
|
||||||
|
assert len(action) == len(envs)
|
||||||
|
# Step envs.
|
||||||
|
promises = [e.step(a, blocking=False) for e, a in zip(envs, action)]
|
||||||
|
obs, _, done = zip(*[p()[:3] for p in promises])
|
||||||
|
obs = list(obs)
|
||||||
|
done = np.stack(done)
|
||||||
|
episode += int(done.sum())
|
||||||
|
length += 1
|
||||||
|
step += (done * length).sum()
|
||||||
|
length *= (1 - done)
|
||||||
|
# Return new state to allow resuming the simulation.
|
||||||
|
return (step - steps, episode - episodes, done, length, obs, agent_state)
|
||||||
|
|
||||||
|
|
||||||
|
def count_episodes(directory):
|
||||||
|
filenames = directory.glob('*.npz')
|
||||||
|
lengths = [int(n.stem.rsplit('-', 1)[-1]) - 1 for n in filenames]
|
||||||
|
episodes, steps = len(lengths), sum(lengths)
|
||||||
|
return episodes, steps
|
||||||
|
|
||||||
|
|
||||||
|
def save_episodes(directory, episodes):
|
||||||
|
directory = pathlib.Path(directory).expanduser()
|
||||||
|
directory.mkdir(parents=True, exist_ok=True)
|
||||||
|
timestamp = datetime.datetime.now().strftime('%Y%m%dT%H%M%S')
|
||||||
|
for episode in episodes:
|
||||||
|
identifier = str(uuid.uuid4().hex)
|
||||||
|
length = len(episode['reward'])
|
||||||
|
filename = directory / f'{timestamp}-{identifier}-{length}.npz'
|
||||||
|
with io.BytesIO() as f1:
|
||||||
|
np.savez_compressed(f1, **episode)
|
||||||
|
f1.seek(0)
|
||||||
|
with filename.open('wb') as f2:
|
||||||
|
f2.write(f1.read())
|
||||||
|
|
||||||
|
|
||||||
|
def load_episodes(directory, rescan, length=None, balance=False, seed=0):
|
||||||
|
directory = pathlib.Path(directory).expanduser()
|
||||||
|
random = np.random.RandomState(seed)
|
||||||
|
cache = {}
|
||||||
|
while True:
|
||||||
|
for filename in directory.glob('*.npz'):
|
||||||
|
if filename not in cache:
|
||||||
|
try:
|
||||||
|
with filename.open('rb') as f:
|
||||||
|
episode = np.load(f)
|
||||||
|
episode = {k: episode[k] for k in episode.keys()}
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Could not load episode: {e}')
|
||||||
|
continue
|
||||||
|
cache[filename] = episode
|
||||||
|
keys = list(cache.keys())
|
||||||
|
for index in random.choice(len(keys), rescan):
|
||||||
|
episode = cache[keys[index]]
|
||||||
|
if length:
|
||||||
|
total = len(next(iter(episode.values())))
|
||||||
|
available = total - length
|
||||||
|
if available < 1:
|
||||||
|
print(f'Skipped short episode of length {available}({total}/{length}).')
|
||||||
|
continue
|
||||||
|
if balance:
|
||||||
|
index = min(random.randint(0, total), available)
|
||||||
|
else:
|
||||||
|
index = int(random.randint(0, available))
|
||||||
|
episode = {k: v[index: index + length]
|
||||||
|
for k, v in episode.items()}
|
||||||
|
yield episode
|
||||||
|
|
||||||
|
class DummyEnv:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._random = np.random.RandomState(seed=0)
|
||||||
|
self._step = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
low = np.zeros([64, 64, 3], dtype=np.uint8)
|
||||||
|
high = 255 * np.ones([64, 64, 3], dtype=np.uint8)
|
||||||
|
spaces = {'image': gym.spaces.Box(low, high)}
|
||||||
|
return gym.spaces.Dict(spaces)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
low = -np.ones([5], dtype=np.float32)
|
||||||
|
high = np.ones([5], dtype=np.float32)
|
||||||
|
return gym.spaces.Box(low, high)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self._step = 0
|
||||||
|
obs = self.observation_space.sample()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
obs = self.observation_space.sample()
|
||||||
|
reward = self._random.uniform(0, 1)
|
||||||
|
self._step += 1
|
||||||
|
done = self._step >= 1000
|
||||||
|
info = {}
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
|
||||||
|
class SampleDist:
|
||||||
|
|
||||||
|
def __init__(self, dist, samples=100):
|
||||||
|
self._dist = dist
|
||||||
|
self._samples = samples
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return 'SampleDist'
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._dist, name)
|
||||||
|
|
||||||
|
def mean(self):
|
||||||
|
samples = self._dist.sample(self._samples)
|
||||||
|
return tf.reduce_mean(samples, 0)
|
||||||
|
|
||||||
|
def mode(self):
|
||||||
|
sample = self._dist.sample(self._samples)
|
||||||
|
logprob = self._dist.log_prob(sample)
|
||||||
|
return tf.gather(sample, tf.argmax(logprob))[0] # pylint: disable=E
|
||||||
|
|
||||||
|
def entropy(self):
|
||||||
|
sample = self._dist.sample(self._samples)
|
||||||
|
logprob = self.log_prob(sample)
|
||||||
|
return -tf.reduce_mean(logprob, 0)
|
||||||
|
|
||||||
|
|
||||||
|
class OneHotDist:
|
||||||
|
|
||||||
|
def __init__(self, logits=None, probs=None):
|
||||||
|
self._dist = tfd.Categorical(logits=logits, probs=probs)
|
||||||
|
self._num_classes = self.mean().shape[-1]
|
||||||
|
self._dtype = prec.global_policy().compute_dtype
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return 'OneHotDist'
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._dist, name)
|
||||||
|
|
||||||
|
def prob(self, events):
|
||||||
|
indices = tf.argmax(events, axis=-1)
|
||||||
|
return self._dist.prob(indices)
|
||||||
|
|
||||||
|
def log_prob(self, events):
|
||||||
|
indices = tf.argmax(events, axis=-1)
|
||||||
|
return self._dist.log_prob(indices)
|
||||||
|
|
||||||
|
def mean(self):
|
||||||
|
return self._dist.probs_parameter()
|
||||||
|
|
||||||
|
def mode(self):
|
||||||
|
return self._one_hot(self._dist.mode())
|
||||||
|
|
||||||
|
def sample(self, amount=None):
|
||||||
|
amount = [amount] if amount else []
|
||||||
|
indices = self._dist.sample(*amount)
|
||||||
|
sample = self._one_hot(indices)
|
||||||
|
probs = self._dist.probs_parameter()
|
||||||
|
sample += tf.cast(probs - tf.stop_gradient(probs), self._dtype)
|
||||||
|
return sample
|
||||||
|
|
||||||
|
def _one_hot(self, indices):
|
||||||
|
return tf.one_hot(indices, self._num_classes, dtype=self._dtype) # pylint: disable=E
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TanhBijector(tfp.bijectors.Bijector):
|
||||||
|
|
||||||
|
def __init__(self, validate_args=False, name='tanh'):
|
||||||
|
super().__init__(
|
||||||
|
forward_min_event_ndims=0,
|
||||||
|
validate_args=validate_args,
|
||||||
|
name=name)
|
||||||
|
|
||||||
|
def _forward(self, x):
|
||||||
|
return tf.nn.tanh(x)
|
||||||
|
|
||||||
|
def _inverse(self, y):
|
||||||
|
dtype = y.dtype
|
||||||
|
y = tf.cast(y, tf.float32)
|
||||||
|
y = tf.where(
|
||||||
|
tf.less_equal(tf.abs(y), 1.),
|
||||||
|
tf.clip_by_value(y, -0.99999997, 0.99999997), y)
|
||||||
|
y = tf.atanh(y)
|
||||||
|
y = tf.cast(y, dtype)
|
||||||
|
return y
|
||||||
|
|
||||||
|
def _forward_log_det_jacobian(self, x):
|
||||||
|
log2 = tf.math.log(tf.constant(2.0, dtype=x.dtype))
|
||||||
|
return 2.0 * (log2 - x - tf.nn.softplus(-2.0 * x))
|
||||||
|
|
||||||
|
|
||||||
|
def lambda_return(
|
||||||
|
reward, value, pcont, bootstrap, lambda_, axis):
|
||||||
|
# Setting lambda=1 gives a discounted Monte Carlo return.
|
||||||
|
# Setting lambda=0 gives a fixed 1-step return.
|
||||||
|
assert reward.shape.ndims == value.shape.ndims, (reward.shape, value.shape)
|
||||||
|
if isinstance(pcont, (int, float)):
|
||||||
|
pcont = pcont * tf.ones_like(reward)
|
||||||
|
dims = list(range(reward.shape.ndims))
|
||||||
|
dims = [axis] + dims[1:axis] + [0] + dims[axis + 1:]
|
||||||
|
if axis != 0:
|
||||||
|
reward = tf.transpose(reward, dims)
|
||||||
|
value = tf.transpose(value, dims)
|
||||||
|
pcont = tf.transpose(pcont, dims)
|
||||||
|
if bootstrap is None:
|
||||||
|
bootstrap = tf.zeros_like(value[-1])
|
||||||
|
next_values = tf.concat([value[1:], bootstrap[None]], 0)
|
||||||
|
inputs = reward + pcont * next_values * (1 - lambda_)
|
||||||
|
returns = static_scan(
|
||||||
|
lambda agg, cur: cur[0] + cur[1] * lambda_ * agg,
|
||||||
|
(inputs, pcont), bootstrap, reverse=True)
|
||||||
|
if axis != 0:
|
||||||
|
returns = tf.transpose(returns, dims)
|
||||||
|
return returns
|
||||||
|
|
||||||
|
|
||||||
|
class Adam(tf.Module):
|
||||||
|
|
||||||
|
def __init__(self, name, modules, lr, clip=None, wd=None, wdpattern=r'.*'):
|
||||||
|
self._name = name
|
||||||
|
self._modules = modules
|
||||||
|
self._clip = clip
|
||||||
|
self._wd = wd
|
||||||
|
self._wdpattern = wdpattern
|
||||||
|
self._opt = tf.optimizers.Adam(lr)
|
||||||
|
self._opt = prec.LossScaleOptimizer(self._opt, 'dynamic')
|
||||||
|
self._variables = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def variables(self):
|
||||||
|
return self._opt.variables()
|
||||||
|
|
||||||
|
def __call__(self, tape, loss):
|
||||||
|
if self._variables is None:
|
||||||
|
variables = [module.variables for module in self._modules]
|
||||||
|
self._variables = tf.nest.flatten(variables)
|
||||||
|
count = sum(np.prod(x.shape) for x in self._variables)
|
||||||
|
print(f'Found {count} {self._name} parameters.')
|
||||||
|
assert len(loss.shape) == 0, loss.shape
|
||||||
|
with tape:
|
||||||
|
loss = self._opt.get_scaled_loss(loss)
|
||||||
|
grads = tape.gradient(loss, self._variables)
|
||||||
|
grads = self._opt.get_unscaled_gradients(grads)
|
||||||
|
norm = tf.linalg.global_norm(grads)
|
||||||
|
if self._clip:
|
||||||
|
grads, _ = tf.clip_by_global_norm(grads, self._clip, norm)
|
||||||
|
if self._wd:
|
||||||
|
context = tf.distribute.get_replica_context()
|
||||||
|
context.merge_call(self._apply_weight_decay)
|
||||||
|
self._opt.apply_gradients(zip(grads, self._variables))
|
||||||
|
return norm
|
||||||
|
|
||||||
|
def _apply_weight_decay(self, strategy):
|
||||||
|
print('Applied weight decay to variables:')
|
||||||
|
for var in self._variables:
|
||||||
|
if re.search(self._wdpattern, self._name + '/' + var.name):
|
||||||
|
print('- ' + self._name + '/' + var.name)
|
||||||
|
strategy.extended.update(var, lambda var: self._wd * var)
|
||||||
|
|
||||||
|
|
||||||
|
def args_type(default):
|
||||||
|
if isinstance(default, bool):
|
||||||
|
return lambda x: bool(['False', 'True'].index(x))
|
||||||
|
if isinstance(default, int):
|
||||||
|
return lambda x: float(x) if ('e' in x or '.' in x) else int(x)
|
||||||
|
if isinstance(default, pathlib.Path):
|
||||||
|
return lambda x: pathlib.Path(x).expanduser()
|
||||||
|
return type(default)
|
||||||
|
|
||||||
|
|
||||||
|
def static_scan(fn, inputs, start, reverse=False):
|
||||||
|
last = start
|
||||||
|
outputs = [[] for _ in tf.nest.flatten(start)]
|
||||||
|
indices = range(len(tf.nest.flatten(inputs)[0]))
|
||||||
|
if reverse:
|
||||||
|
indices = reversed(indices)
|
||||||
|
for index in indices:
|
||||||
|
inp = tf.nest.map_structure(lambda x: x[index], inputs)
|
||||||
|
last = fn(last, inp)
|
||||||
|
[o.append(l) for o, l in zip(outputs, tf.nest.flatten(last))]
|
||||||
|
if reverse:
|
||||||
|
outputs = [list(reversed(x)) for x in outputs]
|
||||||
|
outputs = [tf.stack(x, 0) for x in outputs]
|
||||||
|
return tf.nest.pack_sequence_as(start, outputs)
|
||||||
|
|
||||||
|
|
||||||
|
def _mnd_sample(self, sample_shape=(), seed=None, name='sample'):
|
||||||
|
return tf.random.normal(
|
||||||
|
tuple(sample_shape) + tuple(self.event_shape),
|
||||||
|
self.mean(), self.stddev(), self.dtype, seed, name)
|
||||||
|
|
||||||
|
|
||||||
|
tfd.MultivariateNormalDiag.sample = _mnd_sample
|
||||||
|
|
||||||
|
|
||||||
|
def _cat_sample(self, sample_shape=(), seed=None, name='sample'):
|
||||||
|
assert len(sample_shape) in (0, 1), sample_shape
|
||||||
|
assert len(self.logits_parameter().shape) == 2
|
||||||
|
indices = tf.random.categorical(
|
||||||
|
self.logits_parameter(), sample_shape[0] if sample_shape else 1,
|
||||||
|
self.dtype, seed, name)
|
||||||
|
if not sample_shape:
|
||||||
|
indices = indices[..., 0]
|
||||||
|
return indices
|
||||||
|
|
||||||
|
|
||||||
|
tfd.Categorical.sample = _cat_sample
|
||||||
|
|
||||||
|
|
||||||
|
class Every:
|
||||||
|
|
||||||
|
def __init__(self, every):
|
||||||
|
self._every = every
|
||||||
|
self._last = None
|
||||||
|
|
||||||
|
def __call__(self, step):
|
||||||
|
if self._last is None:
|
||||||
|
self._last = step
|
||||||
|
return True
|
||||||
|
if step >= self._last + self._every:
|
||||||
|
self._last += self._every
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class Once:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._once = True
|
||||||
|
|
||||||
|
def __call__(self):
|
||||||
|
if self._once:
|
||||||
|
self._once = False
|
||||||
|
return True
|
||||||
|
return False
|
73
Dreamer/train_configs/dreamer.yaml
Normal file
73
Dreamer/train_configs/dreamer.yaml
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
dmc:
|
||||||
|
|
||||||
|
logdir: ./
|
||||||
|
video_dir: ./
|
||||||
|
debug: False
|
||||||
|
seed: 0
|
||||||
|
steps: 1000000.0
|
||||||
|
snapshot_every: 20000.0
|
||||||
|
eval_every: 5000.0
|
||||||
|
log_every: 5000.0
|
||||||
|
image_size: 64
|
||||||
|
log_scalars: true
|
||||||
|
log_images: true
|
||||||
|
gpu_growth: true
|
||||||
|
precision: 16
|
||||||
|
task: dmc_cheetah_run_driving
|
||||||
|
envs: 1
|
||||||
|
parallel: none
|
||||||
|
action_repeat: 2
|
||||||
|
time_limit: 1000
|
||||||
|
prefill: 5000
|
||||||
|
eval_noise: 0.0
|
||||||
|
clip_rewards: none
|
||||||
|
deter_size: 283
|
||||||
|
stoch_size: 42
|
||||||
|
num_units: 400
|
||||||
|
dense_act: elu
|
||||||
|
cnn_act: relu
|
||||||
|
cnn_depth: 45
|
||||||
|
pcont: false
|
||||||
|
free_nats: 3.0
|
||||||
|
kl_scale: 1.0
|
||||||
|
pcont_scale: 10.0
|
||||||
|
weight_decay: 0.0
|
||||||
|
weight_decay_pattern: .*
|
||||||
|
batch_size: 50
|
||||||
|
batch_length: 50
|
||||||
|
train_every: 1000
|
||||||
|
train_steps: 100
|
||||||
|
pretrain: 100
|
||||||
|
model_lr: 0.0006
|
||||||
|
value_lr: 8.0e-05
|
||||||
|
actor_lr: 8.0e-05
|
||||||
|
grad_clip: 100.0
|
||||||
|
dataset_balance: false
|
||||||
|
discount: 0.99
|
||||||
|
disclam: 0.95
|
||||||
|
horizon: 15
|
||||||
|
action_dist: tanh_normal
|
||||||
|
action_init_std: 5.0
|
||||||
|
expl: additive_gaussian
|
||||||
|
expl_amount: 0.3
|
||||||
|
expl_decay: 0.0
|
||||||
|
expl_min: 0.0
|
||||||
|
|
||||||
|
debug:
|
||||||
|
|
||||||
|
debug: True
|
||||||
|
pretrain: 1
|
||||||
|
prefill: 1
|
||||||
|
train_steps: 1
|
||||||
|
deter_size: 20
|
||||||
|
stoch_size: 3
|
||||||
|
num_units: 40
|
||||||
|
cnn_depth: 3
|
||||||
|
batch_size: 10
|
||||||
|
batch_length: 20
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
66
Dreamer/train_configs/inverse.yaml
Normal file
66
Dreamer/train_configs/inverse.yaml
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
dmc:
|
||||||
|
|
||||||
|
logdir: ./
|
||||||
|
video_dir: ./
|
||||||
|
debug: False
|
||||||
|
seed: 0
|
||||||
|
steps: 1000000.0
|
||||||
|
snapshot_every: 20000.0
|
||||||
|
eval_every: 5000.0
|
||||||
|
log_every: 5000.0
|
||||||
|
image_size: 64
|
||||||
|
log_scalars: true
|
||||||
|
log_images: false
|
||||||
|
gpu_growth: true
|
||||||
|
precision: 16
|
||||||
|
task: dmc_cheetah_run_driving
|
||||||
|
envs: 1
|
||||||
|
parallel: none
|
||||||
|
action_repeat: 2
|
||||||
|
time_limit: 1000
|
||||||
|
prefill: 5000
|
||||||
|
eval_noise: 0.0
|
||||||
|
clip_rewards: none
|
||||||
|
deter_size: 283
|
||||||
|
stoch_size: 42
|
||||||
|
num_units: 400
|
||||||
|
dense_act: elu
|
||||||
|
cnn_act: relu
|
||||||
|
cnn_depth: 45
|
||||||
|
pcont: false
|
||||||
|
free_nats: 3.0
|
||||||
|
kl_scale: 1.0
|
||||||
|
pcont_scale: 10.0
|
||||||
|
weight_decay: 0.0
|
||||||
|
weight_decay_pattern: .*
|
||||||
|
batch_size: 50
|
||||||
|
batch_length: 50
|
||||||
|
train_every: 1000
|
||||||
|
train_steps: 100
|
||||||
|
pretrain: 100
|
||||||
|
model_lr: 0.0006
|
||||||
|
value_lr: 8.0e-05
|
||||||
|
actor_lr: 8.0e-05
|
||||||
|
grad_clip: 100.0
|
||||||
|
dataset_balance: false
|
||||||
|
discount: 0.99
|
||||||
|
disclam: 0.95
|
||||||
|
horizon: 15
|
||||||
|
action_dist: tanh_normal
|
||||||
|
action_init_std: 5.0
|
||||||
|
expl: additive_gaussian
|
||||||
|
expl_amount: 0.3
|
||||||
|
expl_decay: 0.0
|
||||||
|
expl_min: 0.0
|
||||||
|
|
||||||
|
debug:
|
||||||
|
|
||||||
|
debug: True
|
||||||
|
pretrain: 1
|
||||||
|
prefill: 1
|
||||||
|
train_steps: 1
|
||||||
|
batch_size: 10
|
||||||
|
batch_length: 20
|
||||||
|
|
||||||
|
|
||||||
|
|
75
Dreamer/train_configs/tia.yaml
Normal file
75
Dreamer/train_configs/tia.yaml
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
dmc:
|
||||||
|
|
||||||
|
logdir: ./
|
||||||
|
video_dir: ./
|
||||||
|
debug: False
|
||||||
|
seed: 0
|
||||||
|
steps: 1000000.0
|
||||||
|
snapshot_every: 20000.0
|
||||||
|
eval_every: 5000.0
|
||||||
|
log_every: 5000.0
|
||||||
|
image_size: 64
|
||||||
|
log_scalars: true
|
||||||
|
log_images: true
|
||||||
|
gpu_growth: true
|
||||||
|
precision: 16
|
||||||
|
task: dmc_cheetah_run_driving
|
||||||
|
envs: 1
|
||||||
|
parallel: none
|
||||||
|
action_repeat: 2
|
||||||
|
time_limit: 1000
|
||||||
|
prefill: 5000
|
||||||
|
eval_noise: 0.0
|
||||||
|
clip_rewards: none
|
||||||
|
deter_size: 200
|
||||||
|
stoch_size: 30
|
||||||
|
disen_deter_size: 200
|
||||||
|
disen_stoch_size: 30
|
||||||
|
num_units: 400
|
||||||
|
dense_act: elu
|
||||||
|
cnn_act: relu
|
||||||
|
cnn_depth: 26
|
||||||
|
disen_cnn_depth: 26
|
||||||
|
pcont: false
|
||||||
|
free_nats: 3.0
|
||||||
|
num_reward_opt_iters: 20
|
||||||
|
disen_neg_rew_scale: 20000.0
|
||||||
|
disen_rec_scale: 1.5
|
||||||
|
disen_kl_scale: 1.0
|
||||||
|
kl_scale: 1.0
|
||||||
|
reward_scale: 1.0
|
||||||
|
pcont_scale: 10.0
|
||||||
|
weight_decay: 0.0
|
||||||
|
weight_decay_pattern: .*
|
||||||
|
batch_size: 50
|
||||||
|
batch_length: 50
|
||||||
|
train_every: 1000
|
||||||
|
train_steps: 100
|
||||||
|
pretrain: 100
|
||||||
|
disen_reward_lr: 0.0006
|
||||||
|
model_lr: 0.0006
|
||||||
|
value_lr: 8.0e-05
|
||||||
|
actor_lr: 8.0e-05
|
||||||
|
grad_clip: 100.0
|
||||||
|
dataset_balance: false
|
||||||
|
discount: 0.99
|
||||||
|
disclam: 0.95
|
||||||
|
horizon: 15
|
||||||
|
action_dist: tanh_normal
|
||||||
|
action_init_std: 5.0
|
||||||
|
expl: additive_gaussian
|
||||||
|
expl_amount: 0.3
|
||||||
|
expl_decay: 0.0
|
||||||
|
expl_min: 0.0
|
||||||
|
|
||||||
|
debug:
|
||||||
|
|
||||||
|
debug: True
|
||||||
|
pretrain: 1
|
||||||
|
prefill: 1
|
||||||
|
train_steps: 1
|
||||||
|
batch_size: 10
|
||||||
|
batch_length: 20
|
||||||
|
|
||||||
|
|
||||||
|
|
540
Dreamer/wrappers.py
Normal file
540
Dreamer/wrappers.py
Normal file
@ -0,0 +1,540 @@
|
|||||||
|
import atexit
|
||||||
|
import functools
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
from collections import deque
|
||||||
|
|
||||||
|
from numpy.core import overrides
|
||||||
|
|
||||||
|
|
||||||
|
class DMC2GYMWrapper:
|
||||||
|
|
||||||
|
def __init__(self, env):
|
||||||
|
self._env = env
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
spaces = {}
|
||||||
|
spaces['image'] = gym.spaces.Box(
|
||||||
|
0, 255, (self._env._height, self._env._width, 3,), dtype=np.uint8)
|
||||||
|
return gym.spaces.Dict(spaces)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
image, reward, done, info = self._env.step(action)
|
||||||
|
obs = {'image': image}
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
image = self._env.reset()
|
||||||
|
obs = {'image': image}
|
||||||
|
return obs
|
||||||
|
|
||||||
|
|
||||||
|
class DeepMindControl:
|
||||||
|
|
||||||
|
def __init__(self, name, size=(64, 64), camera=None):
|
||||||
|
domain, task = name.split('_', 1)
|
||||||
|
if domain == 'cup': # Only domain with multiple words.
|
||||||
|
domain = 'ball_in_cup'
|
||||||
|
if isinstance(domain, str):
|
||||||
|
from dm_control import suite
|
||||||
|
self._env = suite.load(domain, task)
|
||||||
|
else:
|
||||||
|
assert task is None
|
||||||
|
self._env = domain()
|
||||||
|
self._size = size
|
||||||
|
if camera is None:
|
||||||
|
camera = dict(quadruped=2).get(domain, 0)
|
||||||
|
self._camera = camera
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
spaces = {}
|
||||||
|
for key, value in self._env.observation_spec().items():
|
||||||
|
spaces[key] = gym.spaces.Box(
|
||||||
|
-np.inf, np.inf, value.shape, dtype=np.float32)
|
||||||
|
spaces['image'] = gym.spaces.Box(
|
||||||
|
0, 255, self._size + (3,), dtype=np.uint8)
|
||||||
|
return gym.spaces.Dict(spaces)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
spec = self._env.action_spec()
|
||||||
|
return gym.spaces.Box(spec.minimum, spec.maximum, dtype=np.float32)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
time_step = self._env.step(action)
|
||||||
|
obs = dict(time_step.observation)
|
||||||
|
obs['image'] = self.render()
|
||||||
|
reward = time_step.reward or 0
|
||||||
|
done = time_step.last()
|
||||||
|
info = {'discount': np.array(time_step.discount, np.float32)}
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
time_step = self._env.reset()
|
||||||
|
obs = dict(time_step.observation)
|
||||||
|
obs['image'] = self.render()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def render(self, *args, **kwargs):
|
||||||
|
if kwargs.get('mode', 'rgb_array') != 'rgb_array':
|
||||||
|
raise ValueError("Only render mode 'rgb_array' is supported.")
|
||||||
|
return self._env.physics.render(*self._size, camera_id=self._camera)
|
||||||
|
|
||||||
|
|
||||||
|
class Atari:
|
||||||
|
|
||||||
|
LOCK = threading.Lock()
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, name, action_repeat=4, size=(84, 84), grayscale=True, noops=30,
|
||||||
|
life_done=False, sticky_actions=True):
|
||||||
|
import gym
|
||||||
|
version = 0 if sticky_actions else 4
|
||||||
|
name = ''.join(word.title() for word in name.split('_'))
|
||||||
|
with self.LOCK:
|
||||||
|
self._env = gym.make('{}NoFrameskip-v{}'.format(name, version))
|
||||||
|
self._action_repeat = action_repeat
|
||||||
|
self._size = size
|
||||||
|
self._grayscale = grayscale
|
||||||
|
self._noops = noops
|
||||||
|
self._life_done = life_done
|
||||||
|
self._lives = None
|
||||||
|
shape = self._env.observation_space.shape[:2] + \
|
||||||
|
(() if grayscale else (3,))
|
||||||
|
self._buffers = [np.empty(shape, dtype=np.uint8) for _ in range(2)]
|
||||||
|
self._random = np.random.RandomState(seed=None)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
shape = self._size + (1 if self._grayscale else 3,)
|
||||||
|
space = gym.spaces.Box(low=0, high=255, shape=shape, dtype=np.uint8)
|
||||||
|
return gym.spaces.Dict({'image': space})
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
return self._env.action_space
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
return self._env.close()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
with self.LOCK:
|
||||||
|
self._env.reset()
|
||||||
|
noops = self._random.randint(1, self._noops + 1)
|
||||||
|
for _ in range(noops):
|
||||||
|
done = self._env.step(0)[2]
|
||||||
|
if done:
|
||||||
|
with self.LOCK:
|
||||||
|
self._env.reset()
|
||||||
|
self._lives = self._env.ale.lives()
|
||||||
|
if self._grayscale:
|
||||||
|
self._env.ale.getScreenGrayscale(self._buffers[0])
|
||||||
|
else:
|
||||||
|
self._env.ale.getScreenRGB2(self._buffers[0])
|
||||||
|
self._buffers[1].fill(0)
|
||||||
|
return self._get_obs()
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
total_reward = 0.0
|
||||||
|
for step in range(self._action_repeat):
|
||||||
|
_, reward, done, info = self._env.step(action)
|
||||||
|
total_reward += reward
|
||||||
|
if self._life_done:
|
||||||
|
lives = self._env.ale.lives()
|
||||||
|
done = done or lives < self._lives
|
||||||
|
self._lives = lives
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
elif step >= self._action_repeat - 2:
|
||||||
|
index = step - (self._action_repeat - 2)
|
||||||
|
if self._grayscale:
|
||||||
|
self._env.ale.getScreenGrayscale(self._buffers[index])
|
||||||
|
else:
|
||||||
|
self._env.ale.getScreenRGB2(self._buffers[index])
|
||||||
|
obs = self._get_obs()
|
||||||
|
return obs, total_reward, done, info
|
||||||
|
|
||||||
|
def render(self, mode):
|
||||||
|
return self._env.render(mode)
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
if self._action_repeat > 1:
|
||||||
|
np.maximum(self._buffers[0],
|
||||||
|
self._buffers[1], out=self._buffers[0])
|
||||||
|
image = np.array(Image.fromarray(self._buffers[0]).resize(
|
||||||
|
self._size, Image.BILINEAR))
|
||||||
|
image = np.clip(image, 0, 255).astype(np.uint8)
|
||||||
|
image = image[:, :, None] if self._grayscale else image
|
||||||
|
return {'image': image}
|
||||||
|
|
||||||
|
|
||||||
|
class Collect:
|
||||||
|
|
||||||
|
def __init__(self, env, callbacks=None, precision=32):
|
||||||
|
self._env = env
|
||||||
|
self._callbacks = callbacks or ()
|
||||||
|
self._precision = precision
|
||||||
|
self._episode = None
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
obs, reward, done, info = self._env.step(action)
|
||||||
|
obs = {k: self._convert(v) for k, v in obs.items()}
|
||||||
|
transition = obs.copy()
|
||||||
|
transition['action'] = action
|
||||||
|
transition['reward'] = reward
|
||||||
|
transition['discount'] = info.get(
|
||||||
|
'discount', np.array(1 - float(done)))
|
||||||
|
self._episode.append(transition)
|
||||||
|
if done:
|
||||||
|
episode = {k: [t[k] for t in self._episode]
|
||||||
|
for k in self._episode[0]}
|
||||||
|
episode = {k: self._convert(v) for k, v in episode.items()}
|
||||||
|
info['episode'] = episode
|
||||||
|
for callback in self._callbacks:
|
||||||
|
callback(episode)
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
obs = self._env.reset()
|
||||||
|
transition = obs.copy()
|
||||||
|
transition['action'] = np.zeros(self._env.action_space.shape)
|
||||||
|
transition['reward'] = 0.0
|
||||||
|
transition['discount'] = 1.0
|
||||||
|
self._episode = [transition]
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def _convert(self, value):
|
||||||
|
value = np.array(value)
|
||||||
|
if np.issubdtype(value.dtype, np.floating):
|
||||||
|
dtype = {16: np.float16, 32: np.float32,
|
||||||
|
64: np.float64}[self._precision]
|
||||||
|
elif np.issubdtype(value.dtype, np.signedinteger):
|
||||||
|
dtype = {16: np.int16, 32: np.int32, 64: np.int64}[self._precision]
|
||||||
|
elif np.issubdtype(value.dtype, np.uint8):
|
||||||
|
dtype = np.uint8
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(value.dtype)
|
||||||
|
return value.astype(dtype)
|
||||||
|
|
||||||
|
|
||||||
|
class TimeLimit:
|
||||||
|
|
||||||
|
def __init__(self, env, duration):
|
||||||
|
self._env = env
|
||||||
|
self._duration = duration
|
||||||
|
self._step = None
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
assert self._step is not None, 'Must reset environment.'
|
||||||
|
obs, reward, done, info = self._env.step(action)
|
||||||
|
self._step += 1
|
||||||
|
if self._step >= self._duration:
|
||||||
|
done = True
|
||||||
|
if 'discount' not in info:
|
||||||
|
info['discount'] = np.array(1.0).astype(np.float32)
|
||||||
|
self._step = None
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self._step = 0
|
||||||
|
return self._env.reset()
|
||||||
|
|
||||||
|
|
||||||
|
class ActionRepeat:
|
||||||
|
|
||||||
|
def __init__(self, env, amount):
|
||||||
|
self._env = env
|
||||||
|
self._amount = amount
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
done = False
|
||||||
|
total_reward = 0
|
||||||
|
current_step = 0
|
||||||
|
while current_step < self._amount and not done:
|
||||||
|
obs, reward, done, info = self._env.step(action)
|
||||||
|
total_reward += reward
|
||||||
|
current_step += 1
|
||||||
|
return obs, total_reward, done, info
|
||||||
|
|
||||||
|
|
||||||
|
class NormalizeActions:
|
||||||
|
|
||||||
|
def __init__(self, env):
|
||||||
|
self._env = env
|
||||||
|
self._mask = np.logical_and(
|
||||||
|
np.isfinite(env.action_space.low),
|
||||||
|
np.isfinite(env.action_space.high))
|
||||||
|
self._low = np.where(self._mask, env.action_space.low, -1)
|
||||||
|
self._high = np.where(self._mask, env.action_space.high, 1)
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
low = np.where(self._mask, -np.ones_like(self._low), self._low)
|
||||||
|
high = np.where(self._mask, np.ones_like(self._low), self._high)
|
||||||
|
return gym.spaces.Box(low, high, dtype=np.float32)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
original = (action + 1) / 2 * (self._high - self._low) + self._low
|
||||||
|
original = np.where(self._mask, original, action)
|
||||||
|
return self._env.step(original)
|
||||||
|
|
||||||
|
|
||||||
|
class ObsDict:
|
||||||
|
|
||||||
|
def __init__(self, env, key='obs'):
|
||||||
|
self._env = env
|
||||||
|
self._key = key
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
spaces = {self._key: self._env.observation_space}
|
||||||
|
return gym.spaces.Dict(spaces)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
return self._env.action_space
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
obs, reward, done, info = self._env.step(action)
|
||||||
|
obs = {self._key: np.array(obs)}
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
obs = self._env.reset()
|
||||||
|
obs = {self._key: np.array(obs)}
|
||||||
|
return obs
|
||||||
|
|
||||||
|
|
||||||
|
class OneHotAction:
|
||||||
|
|
||||||
|
def __init__(self, env):
|
||||||
|
assert isinstance(env.action_space, gym.spaces.Discrete)
|
||||||
|
self._env = env
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
shape = (self._env.action_space.n,)
|
||||||
|
space = gym.spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
|
||||||
|
space.sample = self._sample_action
|
||||||
|
return space
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
index = np.argmax(action).astype(int)
|
||||||
|
reference = np.zeros_like(action)
|
||||||
|
reference[index] = 1
|
||||||
|
if not np.allclose(reference, action):
|
||||||
|
raise ValueError(f'Invalid one-hot action:\n{action}')
|
||||||
|
return self._env.step(index)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
return self._env.reset()
|
||||||
|
|
||||||
|
def _sample_action(self):
|
||||||
|
actions = self._env.action_space.n
|
||||||
|
index = self._random.randint(0, actions)
|
||||||
|
reference = np.zeros(actions, dtype=np.float32)
|
||||||
|
reference[index] = 1.0
|
||||||
|
return reference
|
||||||
|
|
||||||
|
|
||||||
|
class RewardObs:
|
||||||
|
|
||||||
|
def __init__(self, env):
|
||||||
|
self._env = env
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
spaces = self._env.observation_space.spaces
|
||||||
|
assert 'reward' not in spaces
|
||||||
|
spaces['reward'] = gym.spaces.Box(-np.inf, np.inf, dtype=np.float32)
|
||||||
|
return gym.spaces.Dict(spaces)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
obs, reward, done, info = self._env.step(action)
|
||||||
|
obs['reward'] = reward
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
obs = self._env.reset()
|
||||||
|
obs['reward'] = 0.0
|
||||||
|
return obs
|
||||||
|
|
||||||
|
|
||||||
|
class Async:
|
||||||
|
|
||||||
|
_ACCESS = 1
|
||||||
|
_CALL = 2
|
||||||
|
_RESULT = 3
|
||||||
|
_EXCEPTION = 4
|
||||||
|
_CLOSE = 5
|
||||||
|
|
||||||
|
def __init__(self, ctor, strategy='process'):
|
||||||
|
self._strategy = strategy
|
||||||
|
if strategy == 'none':
|
||||||
|
self._env = ctor()
|
||||||
|
elif strategy == 'thread':
|
||||||
|
import multiprocessing.dummy as mp
|
||||||
|
elif strategy == 'process':
|
||||||
|
import multiprocessing as mp
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(strategy)
|
||||||
|
if strategy != 'none':
|
||||||
|
self._conn, conn = mp.Pipe()
|
||||||
|
self._process = mp.Process(target=self._worker, args=(ctor, conn))
|
||||||
|
atexit.register(self.close)
|
||||||
|
self._process.start()
|
||||||
|
self._obs_space = None
|
||||||
|
self._action_space = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
if not self._obs_space:
|
||||||
|
self._obs_space = self.__getattr__('observation_space')
|
||||||
|
return self._obs_space
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
if not self._action_space:
|
||||||
|
self._action_space = self.__getattr__('action_space')
|
||||||
|
return self._action_space
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
if self._strategy == 'none':
|
||||||
|
return getattr(self._env, name)
|
||||||
|
self._conn.send((self._ACCESS, name))
|
||||||
|
return self._receive()
|
||||||
|
|
||||||
|
def call(self, name, *args, **kwargs):
|
||||||
|
blocking = kwargs.pop('blocking', True)
|
||||||
|
if self._strategy == 'none':
|
||||||
|
return functools.partial(getattr(self._env, name), *args, **kwargs)
|
||||||
|
payload = name, args, kwargs
|
||||||
|
self._conn.send((self._CALL, payload))
|
||||||
|
promise = self._receive
|
||||||
|
return promise() if blocking else promise
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
if self._strategy == 'none':
|
||||||
|
try:
|
||||||
|
self._env.close()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self._conn.send((self._CLOSE, None))
|
||||||
|
self._conn.close()
|
||||||
|
except IOError:
|
||||||
|
# The connection was already closed.
|
||||||
|
pass
|
||||||
|
self._process.join()
|
||||||
|
|
||||||
|
def step(self, action, blocking=True):
|
||||||
|
return self.call('step', action, blocking=blocking)
|
||||||
|
|
||||||
|
def reset(self, blocking=True):
|
||||||
|
return self.call('reset', blocking=blocking)
|
||||||
|
|
||||||
|
def _receive(self):
|
||||||
|
try:
|
||||||
|
message, payload = self._conn.recv()
|
||||||
|
except ConnectionResetError:
|
||||||
|
raise RuntimeError('Environment worker crashed.')
|
||||||
|
# Re-raise exceptions in the main process.
|
||||||
|
if message == self._EXCEPTION:
|
||||||
|
stacktrace = payload
|
||||||
|
raise Exception(stacktrace)
|
||||||
|
if message == self._RESULT:
|
||||||
|
return payload
|
||||||
|
raise KeyError(f'Received message of unexpected type {message}')
|
||||||
|
|
||||||
|
def _worker(self, ctor, conn):
|
||||||
|
try:
|
||||||
|
env = ctor()
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
# Only block for short times to have keyboard exceptions be raised.
|
||||||
|
if not conn.poll(0.1):
|
||||||
|
continue
|
||||||
|
message, payload = conn.recv()
|
||||||
|
except (EOFError, KeyboardInterrupt):
|
||||||
|
break
|
||||||
|
if message == self._ACCESS:
|
||||||
|
name = payload
|
||||||
|
result = getattr(env, name)
|
||||||
|
conn.send((self._RESULT, result))
|
||||||
|
continue
|
||||||
|
if message == self._CALL:
|
||||||
|
name, args, kwargs = payload
|
||||||
|
result = getattr(env, name)(*args, **kwargs)
|
||||||
|
conn.send((self._RESULT, result))
|
||||||
|
continue
|
||||||
|
if message == self._CLOSE:
|
||||||
|
assert payload is None
|
||||||
|
break
|
||||||
|
raise KeyError(f'Received message of unknown type {message}')
|
||||||
|
except Exception:
|
||||||
|
stacktrace = ''.join(traceback.format_exception(*sys.exc_info()))
|
||||||
|
print(f'Error in environment process: {stacktrace}')
|
||||||
|
conn.send((self._EXCEPTION, stacktrace))
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
class FrameStack(gym.Wrapper):
|
||||||
|
def __init__(self, env, k):
|
||||||
|
gym.Wrapper.__init__(self, env)
|
||||||
|
self._k = k
|
||||||
|
self._frames = deque([], maxlen=k)
|
||||||
|
shp = env.observation_space.shape
|
||||||
|
self.observation_space = gym.spaces.Box(
|
||||||
|
low=0,
|
||||||
|
high=1,
|
||||||
|
shape=((shp[0] * k,) + shp[1:]),
|
||||||
|
dtype=env.observation_space.dtype
|
||||||
|
)
|
||||||
|
self._max_episode_steps = env._max_episode_steps
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
obs = self.env.reset()
|
||||||
|
for _ in range(self._k):
|
||||||
|
self._frames.append(obs)
|
||||||
|
return self._get_obs()
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
obs, reward, done, info = self.env.step(action)
|
||||||
|
self._frames.append(obs)
|
||||||
|
return self._get_obs(), reward, done, info
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
assert len(self._frames) == self._k
|
||||||
|
return np.concatenate(list(self._frames), axis=0)
|
185
DreamerV2/configs.yaml
Normal file
185
DreamerV2/configs.yaml
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
defaults:
|
||||||
|
gpu: 'none'
|
||||||
|
logdir: ./
|
||||||
|
traindir: null
|
||||||
|
evaldir: null
|
||||||
|
offline_traindir: ''
|
||||||
|
offline_evaldir: ''
|
||||||
|
seed: 0
|
||||||
|
steps: 1e7
|
||||||
|
eval_every: 1e4
|
||||||
|
log_every: 1e4
|
||||||
|
reset_every: 0
|
||||||
|
gpu_growth: True
|
||||||
|
precision: 32
|
||||||
|
debug: False
|
||||||
|
expl_gifs: False
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
task: 'dmc_walker_walk'
|
||||||
|
size: [64, 64]
|
||||||
|
envs: 1
|
||||||
|
action_repeat: 2
|
||||||
|
time_limit: 1000
|
||||||
|
prefill: 2500
|
||||||
|
eval_noise: 0.0
|
||||||
|
clip_rewards: 'identity'
|
||||||
|
atari_grayscale: False
|
||||||
|
|
||||||
|
# Model
|
||||||
|
dyn_cell: 'gru'
|
||||||
|
dyn_hidden: 200
|
||||||
|
dyn_deter: 200
|
||||||
|
dyn_stoch: 50
|
||||||
|
dyn_discrete: 0
|
||||||
|
dyn_input_layers: 1
|
||||||
|
dyn_output_layers: 1
|
||||||
|
dyn_shared: False
|
||||||
|
dyn_mean_act: 'none'
|
||||||
|
dyn_std_act: 'sigmoid2'
|
||||||
|
dyn_min_std: 0.1
|
||||||
|
grad_heads: ['image', 'reward']
|
||||||
|
units: 400
|
||||||
|
reward_layers: 2
|
||||||
|
discount_layers: 3
|
||||||
|
value_layers: 3
|
||||||
|
actor_layers: 4
|
||||||
|
act: 'elu'
|
||||||
|
cnn_depth: 32
|
||||||
|
encoder_kernels: [4, 4, 4, 4]
|
||||||
|
decoder_kernels: [5, 5, 6, 6]
|
||||||
|
decoder_thin: True
|
||||||
|
value_head: 'normal'
|
||||||
|
kl_scale: '1.0'
|
||||||
|
kl_balance: '0.8'
|
||||||
|
kl_free: '1.0'
|
||||||
|
pred_discount: False
|
||||||
|
discount_scale: 1.0
|
||||||
|
reward_scale: 1.0
|
||||||
|
weight_decay: 0.0
|
||||||
|
|
||||||
|
# Training
|
||||||
|
batch_size: 50
|
||||||
|
batch_length: 50
|
||||||
|
train_every: 5
|
||||||
|
train_steps: 1
|
||||||
|
pretrain: 100
|
||||||
|
model_lr: 3e-4
|
||||||
|
value_lr: 8e-5
|
||||||
|
actor_lr: 8e-5
|
||||||
|
opt_eps: 1e-5
|
||||||
|
grad_clip: 100
|
||||||
|
value_grad_clip: 100
|
||||||
|
actor_grad_clip: 100
|
||||||
|
dataset_size: 0
|
||||||
|
oversample_ends: False
|
||||||
|
slow_value_target: True
|
||||||
|
slow_actor_target: True
|
||||||
|
slow_target_update: 100
|
||||||
|
slow_target_fraction: 1
|
||||||
|
opt: 'adam'
|
||||||
|
|
||||||
|
# Behavior.
|
||||||
|
discount: 0.99
|
||||||
|
discount_lambda: 0.95
|
||||||
|
imag_horizon: 15
|
||||||
|
imag_gradient: 'dynamics'
|
||||||
|
imag_gradient_mix: '0.1'
|
||||||
|
imag_sample: True
|
||||||
|
actor_dist: 'trunc_normal'
|
||||||
|
actor_entropy: '1e-4'
|
||||||
|
actor_state_entropy: 0.0
|
||||||
|
actor_init_std: 1.0
|
||||||
|
actor_min_std: 0.1
|
||||||
|
actor_disc: 5
|
||||||
|
actor_temp: 0.1
|
||||||
|
actor_outscale: 0.0
|
||||||
|
expl_amount: 0.0
|
||||||
|
eval_state_mean: False
|
||||||
|
collect_dyn_sample: True
|
||||||
|
behavior_stop_grad: True
|
||||||
|
value_decay: 0.0
|
||||||
|
future_entropy: False
|
||||||
|
|
||||||
|
# Exploration
|
||||||
|
expl_behavior: 'greedy'
|
||||||
|
expl_until: 0
|
||||||
|
expl_extr_scale: 0.0
|
||||||
|
expl_intr_scale: 1.0
|
||||||
|
disag_target: 'stoch'
|
||||||
|
disag_log: True
|
||||||
|
disag_models: 10
|
||||||
|
disag_offset: 1
|
||||||
|
disag_layers: 4
|
||||||
|
disag_units: 400
|
||||||
|
|
||||||
|
atari:
|
||||||
|
|
||||||
|
# General
|
||||||
|
task: 'atari_demon_attack'
|
||||||
|
steps: 3e7
|
||||||
|
eval_every: 1e5
|
||||||
|
log_every: 1e4
|
||||||
|
prefill: 50000
|
||||||
|
dataset_size: 2e6
|
||||||
|
pretrain: 0
|
||||||
|
precision: 16
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
time_limit: 108000 # 30 minutes of game play.
|
||||||
|
atari_grayscale: True
|
||||||
|
action_repeat: 4
|
||||||
|
eval_noise: 0.001
|
||||||
|
train_every: 16
|
||||||
|
train_steps: 1
|
||||||
|
clip_rewards: 'tanh'
|
||||||
|
|
||||||
|
# Model
|
||||||
|
grad_heads: ['image', 'reward', 'discount']
|
||||||
|
dyn_cell: 'gru_layer_norm'
|
||||||
|
pred_discount: True
|
||||||
|
cnn_depth: 48
|
||||||
|
dyn_deter: 600
|
||||||
|
dyn_hidden: 600
|
||||||
|
dyn_stoch: 32
|
||||||
|
dyn_discrete: 32
|
||||||
|
reward_layers: 4
|
||||||
|
discount_layers: 4
|
||||||
|
value_layers: 4
|
||||||
|
actor_layers: 4
|
||||||
|
|
||||||
|
# Behavior
|
||||||
|
actor_dist: 'onehot'
|
||||||
|
actor_entropy: 'linear(3e-3,3e-4,2.5e6)'
|
||||||
|
expl_amount: 0.0
|
||||||
|
expl_until: 3e7
|
||||||
|
discount: 0.995
|
||||||
|
imag_gradient: 'both'
|
||||||
|
imag_gradient_mix: 'linear(0.1,0,2.5e6)'
|
||||||
|
|
||||||
|
# Training
|
||||||
|
discount_scale: 5.0
|
||||||
|
reward_scale: 1
|
||||||
|
weight_decay: 1e-6
|
||||||
|
model_lr: 2e-4
|
||||||
|
kl_scale: 0.1
|
||||||
|
kl_free: 0.0
|
||||||
|
actor_lr: 4e-5
|
||||||
|
value_lr: 1e-4
|
||||||
|
oversample_ends: True
|
||||||
|
|
||||||
|
# Disen
|
||||||
|
disen_cnn_depth: 16
|
||||||
|
disen_only_scale: 1.0
|
||||||
|
disen_discount_scale: 2000.0
|
||||||
|
disen_reward_scale: 2000.0
|
||||||
|
num_reward_opt_iters: 20
|
||||||
|
|
||||||
|
debug:
|
||||||
|
|
||||||
|
debug: True
|
||||||
|
pretrain: 1
|
||||||
|
prefill: 1
|
||||||
|
train_steps: 1
|
||||||
|
batch_size: 10
|
||||||
|
batch_length: 20
|
316
DreamerV2/dreamer.py
Normal file
316
DreamerV2/dreamer.py
Normal file
@ -0,0 +1,316 @@
|
|||||||
|
import argparse
|
||||||
|
import collections
|
||||||
|
import functools
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import sys
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
warnings.filterwarnings('ignore', '.*box bound precision lowered.*')
|
||||||
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||||
|
os.environ['MUJOCO_GL'] = 'egl'
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import ruamel.yaml as yaml
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras.mixed_precision import experimental as prec
|
||||||
|
|
||||||
|
tf.get_logger().setLevel('ERROR')
|
||||||
|
|
||||||
|
from tensorflow_probability import distributions as tfd
|
||||||
|
|
||||||
|
sys.path.append(str(pathlib.Path(__file__).parent))
|
||||||
|
|
||||||
|
import exploration as expl
|
||||||
|
import models
|
||||||
|
import tools
|
||||||
|
import wrappers
|
||||||
|
|
||||||
|
class Dreamer(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, config, logger, dataset):
|
||||||
|
self._config = config
|
||||||
|
self._logger = logger
|
||||||
|
self._float = prec.global_policy().compute_dtype
|
||||||
|
self._should_log = tools.Every(config.log_every)
|
||||||
|
self._should_train = tools.Every(config.train_every)
|
||||||
|
self._should_pretrain = tools.Once()
|
||||||
|
self._should_reset = tools.Every(config.reset_every)
|
||||||
|
self._should_expl = tools.Until(int(
|
||||||
|
config.expl_until / config.action_repeat))
|
||||||
|
self._metrics = collections.defaultdict(tf.metrics.Mean)
|
||||||
|
with tf.device('cpu:0'):
|
||||||
|
self._step = tf.Variable(count_steps(config.traindir), dtype=tf.int64)
|
||||||
|
# Schedules.
|
||||||
|
config.actor_entropy = (
|
||||||
|
lambda x=config.actor_entropy: tools.schedule(x, self._step))
|
||||||
|
config.actor_state_entropy = (
|
||||||
|
lambda x=config.actor_state_entropy: tools.schedule(x, self._step))
|
||||||
|
config.imag_gradient_mix = (
|
||||||
|
lambda x=config.imag_gradient_mix: tools.schedule(x, self._step))
|
||||||
|
self._dataset = iter(dataset)
|
||||||
|
self._wm = models.WorldModel(self._step, config)
|
||||||
|
self._task_behavior = models.ImagBehavior(
|
||||||
|
config, self._wm, config.behavior_stop_grad)
|
||||||
|
reward = lambda f, s, a: self._wm.heads['reward'](f).mode()
|
||||||
|
self._expl_behavior = dict(
|
||||||
|
greedy=lambda: self._task_behavior,
|
||||||
|
random=lambda: expl.Random(config),
|
||||||
|
plan2explore=lambda: expl.Plan2Explore(config, self._wm, reward),
|
||||||
|
)[config.expl_behavior]()
|
||||||
|
# Train step to initialize variables including optimizer statistics.
|
||||||
|
self._train(next(self._dataset))
|
||||||
|
|
||||||
|
def __call__(self, obs, reset, state=None, training=True):
|
||||||
|
step = self._step.numpy().item()
|
||||||
|
if self._should_reset(step):
|
||||||
|
state = None
|
||||||
|
if state is not None and reset.any():
|
||||||
|
mask = tf.cast(1 - reset, self._float)[:, None]
|
||||||
|
state = tf.nest.map_structure(lambda x: x * mask, state)
|
||||||
|
if training and self._should_train(step):
|
||||||
|
steps = (
|
||||||
|
self._config.pretrain if self._should_pretrain()
|
||||||
|
else self._config.train_steps)
|
||||||
|
for _ in range(steps):
|
||||||
|
self._train(next(self._dataset))
|
||||||
|
if self._should_log(step):
|
||||||
|
for name, mean in self._metrics.items():
|
||||||
|
self._logger.scalar(name, float(mean.result()))
|
||||||
|
mean.reset_states()
|
||||||
|
openl_joint, openl_main, openl_disen, openl_mask = self._wm.video_pred(next(self._dataset))
|
||||||
|
self._logger.video('train_openl_joint', openl_joint)
|
||||||
|
self._logger.video('train_openl_main', openl_main)
|
||||||
|
self._logger.video('train_openl_disen', openl_disen)
|
||||||
|
self._logger.video('train_openl_mask', openl_mask)
|
||||||
|
self._logger.write(fps=True)
|
||||||
|
action, state = self._policy(obs, state, training)
|
||||||
|
if training:
|
||||||
|
self._step.assign_add(len(reset))
|
||||||
|
self._logger.step = self._config.action_repeat \
|
||||||
|
* self._step.numpy().item()
|
||||||
|
return action, state
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def _policy(self, obs, state, training):
|
||||||
|
if state is None:
|
||||||
|
batch_size = len(obs['image'])
|
||||||
|
latent = self._wm.dynamics.initial(len(obs['image']))
|
||||||
|
action = tf.zeros((batch_size, self._config.num_actions), self._float)
|
||||||
|
else:
|
||||||
|
latent, action = state
|
||||||
|
embed = self._wm.encoder(self._wm.preprocess(obs))
|
||||||
|
latent, _ = self._wm.dynamics.obs_step(
|
||||||
|
latent, action, embed, self._config.collect_dyn_sample)
|
||||||
|
if self._config.eval_state_mean:
|
||||||
|
latent['stoch'] = latent['mean']
|
||||||
|
feat = self._wm.dynamics.get_feat(latent)
|
||||||
|
if not training:
|
||||||
|
action = self._task_behavior.actor(feat).mode()
|
||||||
|
elif self._should_expl(self._step):
|
||||||
|
action = self._expl_behavior.actor(feat).sample()
|
||||||
|
else:
|
||||||
|
action = self._task_behavior.actor(feat).sample()
|
||||||
|
if self._config.actor_dist == 'onehot_gumble':
|
||||||
|
action = tf.cast(
|
||||||
|
tf.one_hot(tf.argmax(action, axis=-1), self._config.num_actions),
|
||||||
|
action.dtype)
|
||||||
|
action = self._exploration(action, training)
|
||||||
|
state = (latent, action)
|
||||||
|
return action, state
|
||||||
|
|
||||||
|
def _exploration(self, action, training):
|
||||||
|
amount = self._config.expl_amount if training else self._config.eval_noise
|
||||||
|
if amount == 0:
|
||||||
|
return action
|
||||||
|
amount = tf.cast(amount, self._float)
|
||||||
|
if 'onehot' in self._config.actor_dist:
|
||||||
|
probs = amount / self._config.num_actions + (1 - amount) * action
|
||||||
|
return tools.OneHotDist(probs=probs).sample()
|
||||||
|
else:
|
||||||
|
return tf.clip_by_value(tfd.Normal(action, amount).sample(), -1, 1)
|
||||||
|
raise NotImplementedError(self._config.action_noise)
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def _train(self, data):
|
||||||
|
print('Tracing train function.')
|
||||||
|
metrics = {}
|
||||||
|
embed, post, feat, kl, mets = self._wm.train(data)
|
||||||
|
metrics.update(mets)
|
||||||
|
start = post
|
||||||
|
if self._config.pred_discount: # Last step could be terminal.
|
||||||
|
start = {k: v[:, :-1] for k, v in post.items()}
|
||||||
|
embed, feat, kl = embed[:, :-1], feat[:, :-1], kl[:, :-1]
|
||||||
|
reward = lambda f, s, a: self._wm.heads['reward'](f).mode()
|
||||||
|
metrics.update(self._task_behavior.train(start, reward)[-1])
|
||||||
|
if self._config.expl_behavior != 'greedy':
|
||||||
|
mets = self._expl_behavior.train(start, feat, embed, kl)[-1]
|
||||||
|
metrics.update({'expl_' + key: value for key, value in mets.items()})
|
||||||
|
for name, value in metrics.items():
|
||||||
|
self._metrics[name].update_state(value)
|
||||||
|
|
||||||
|
|
||||||
|
def count_steps(folder):
|
||||||
|
return sum(int(str(n).split('-')[-1][:-4]) - 1 for n in folder.glob('*.npz'))
|
||||||
|
|
||||||
|
|
||||||
|
def make_dataset(episodes, config):
|
||||||
|
example = episodes[next(iter(episodes.keys()))]
|
||||||
|
types = {k: v.dtype for k, v in example.items()}
|
||||||
|
shapes = {k: (None,) + v.shape[1:] for k, v in example.items()}
|
||||||
|
generator = lambda: tools.sample_episodes(
|
||||||
|
episodes, config.batch_length, config.oversample_ends)
|
||||||
|
dataset = tf.data.Dataset.from_generator(generator, types, shapes)
|
||||||
|
dataset = dataset.batch(config.batch_size, drop_remainder=True)
|
||||||
|
dataset = dataset.prefetch(10)
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
|
def make_env(config, logger, mode, train_eps, eval_eps):
|
||||||
|
suite, task = config.task.split('_', 1)
|
||||||
|
if suite == 'dmc':
|
||||||
|
env = wrappers.DeepMindControl(task, config.action_repeat, config.size)
|
||||||
|
env = wrappers.NormalizeActions(env)
|
||||||
|
elif suite == 'atari':
|
||||||
|
env = wrappers.Atari(
|
||||||
|
task, config.action_repeat, config.size,
|
||||||
|
grayscale=config.atari_grayscale,
|
||||||
|
life_done=False and (mode == 'train'),
|
||||||
|
sticky_actions=True,
|
||||||
|
all_actions=True)
|
||||||
|
env = wrappers.OneHotAction(env)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(suite)
|
||||||
|
env = wrappers.TimeLimit(env, config.time_limit)
|
||||||
|
callbacks = [functools.partial(
|
||||||
|
process_episode, config, logger, mode, train_eps, eval_eps)]
|
||||||
|
env = wrappers.CollectDataset(env, callbacks)
|
||||||
|
env = wrappers.RewardObs(env)
|
||||||
|
return env
|
||||||
|
|
||||||
|
|
||||||
|
def process_episode(config, logger, mode, train_eps, eval_eps, episode):
|
||||||
|
directory = dict(train=config.traindir, eval=config.evaldir)[mode]
|
||||||
|
cache = dict(train=train_eps, eval=eval_eps)[mode]
|
||||||
|
filename = tools.save_episodes(directory, [episode])[0]
|
||||||
|
length = len(episode['reward']) - 1
|
||||||
|
score = float(episode['reward'].astype(np.float64).sum())
|
||||||
|
video = episode['image']
|
||||||
|
if mode == 'eval':
|
||||||
|
cache.clear()
|
||||||
|
if mode == 'train' and config.dataset_size:
|
||||||
|
total = 0
|
||||||
|
for key, ep in reversed(sorted(cache.items(), key=lambda x: x[0])):
|
||||||
|
if total <= config.dataset_size - length:
|
||||||
|
total += len(ep['reward']) - 1
|
||||||
|
else:
|
||||||
|
del cache[key]
|
||||||
|
logger.scalar('dataset_size', total + length)
|
||||||
|
cache[str(filename)] = episode
|
||||||
|
print(f'{mode.title()} episode has {length} steps and return {score:.1f}.')
|
||||||
|
logger.scalar(f'{mode}_return', score)
|
||||||
|
logger.scalar(f'{mode}_length', length)
|
||||||
|
logger.scalar(f'{mode}_episodes', len(cache))
|
||||||
|
if mode == 'eval' or config.expl_gifs:
|
||||||
|
logger.video(f'{mode}_policy', video[None])
|
||||||
|
logger.write()
|
||||||
|
|
||||||
|
|
||||||
|
def main(logdir, config):
|
||||||
|
|
||||||
|
logdir = os.path.join(
|
||||||
|
logdir, config.task, 'Ours', str(config.seed))
|
||||||
|
|
||||||
|
logdir = pathlib.Path(logdir).expanduser()
|
||||||
|
config.traindir = config.traindir or logdir / 'train_eps'
|
||||||
|
config.evaldir = config.evaldir or logdir / 'eval_eps'
|
||||||
|
config.steps //= config.action_repeat
|
||||||
|
config.eval_every //= config.action_repeat
|
||||||
|
config.log_every //= config.action_repeat
|
||||||
|
config.time_limit //= config.action_repeat
|
||||||
|
config.act = getattr(tf.nn, config.act)
|
||||||
|
|
||||||
|
if config.debug:
|
||||||
|
tf.config.experimental_run_functions_eagerly(True)
|
||||||
|
if config.gpu_growth:
|
||||||
|
message = 'No GPU found. To actually train on CPU remove this assert.'
|
||||||
|
assert tf.config.experimental.list_physical_devices('GPU'), message
|
||||||
|
for gpu in tf.config.experimental.list_physical_devices('GPU'):
|
||||||
|
tf.config.experimental.set_memory_growth(gpu, True)
|
||||||
|
assert config.precision in (16, 32), config.precision
|
||||||
|
if config.precision == 16:
|
||||||
|
prec.set_policy(prec.Policy('mixed_float16'))
|
||||||
|
print('Logdir', logdir)
|
||||||
|
logdir.mkdir(parents=True, exist_ok=True)
|
||||||
|
config.traindir.mkdir(parents=True, exist_ok=True)
|
||||||
|
config.evaldir.mkdir(parents=True, exist_ok=True)
|
||||||
|
step = count_steps(config.traindir)
|
||||||
|
logger = tools.Logger(logdir, config.action_repeat * step)
|
||||||
|
|
||||||
|
print('Create envs.')
|
||||||
|
if config.offline_traindir:
|
||||||
|
directory = config.offline_traindir.format(**vars(config))
|
||||||
|
else:
|
||||||
|
directory = config.traindir
|
||||||
|
train_eps = tools.load_episodes(directory, limit=config.dataset_size)
|
||||||
|
if config.offline_evaldir:
|
||||||
|
directory = config.offline_evaldir.format(**vars(config))
|
||||||
|
else:
|
||||||
|
directory = config.evaldir
|
||||||
|
eval_eps = tools.load_episodes(directory, limit=1)
|
||||||
|
make = lambda mode: make_env(config, logger, mode, train_eps, eval_eps)
|
||||||
|
train_envs = [make('train') for _ in range(config.envs)]
|
||||||
|
eval_envs = [make('eval') for _ in range(config.envs)]
|
||||||
|
acts = train_envs[0].action_space
|
||||||
|
config.num_actions = acts.n if hasattr(acts, 'n') else acts.shape[0]
|
||||||
|
|
||||||
|
prefill = max(0, config.prefill - count_steps(config.traindir))
|
||||||
|
print(f'Prefill dataset ({prefill} steps).')
|
||||||
|
random_agent = lambda o, d, s: ([acts.sample() for _ in d], s)
|
||||||
|
tools.simulate(random_agent, train_envs, prefill)
|
||||||
|
tools.simulate(random_agent, eval_envs, episodes=1)
|
||||||
|
logger.step = config.action_repeat * count_steps(config.traindir)
|
||||||
|
|
||||||
|
print('Simulate agent.')
|
||||||
|
train_dataset = make_dataset(train_eps, config)
|
||||||
|
eval_dataset = iter(make_dataset(eval_eps, config))
|
||||||
|
agent = Dreamer(config, logger, train_dataset)
|
||||||
|
if (logdir / 'variables.pkl').exists():
|
||||||
|
agent.load(logdir / 'variables.pkl')
|
||||||
|
agent._should_pretrain._once = False
|
||||||
|
|
||||||
|
state = None
|
||||||
|
suite, task = config.task.split('_', 1)
|
||||||
|
num_eval_episodes = 10 if suite == 'procgen' else 1
|
||||||
|
while agent._step.numpy().item() < config.steps:
|
||||||
|
logger.write()
|
||||||
|
print('Start evaluation.')
|
||||||
|
openl_joint, openl_main, openl_disen, openl_mask = agent._wm.video_pred(next(eval_dataset))
|
||||||
|
logger.video('eval_openl_joint', openl_joint)
|
||||||
|
logger.video('eval_openl_main', openl_main)
|
||||||
|
logger.video('eval_openl_disen', openl_disen)
|
||||||
|
logger.video('eval_openl_mask', openl_mask)
|
||||||
|
eval_policy = functools.partial(agent, training=False)
|
||||||
|
tools.simulate(eval_policy, eval_envs, episodes=num_eval_episodes)
|
||||||
|
print('Start training.')
|
||||||
|
state = tools.simulate(agent, train_envs, config.eval_every, state=state)
|
||||||
|
agent.save(logdir / 'variables.pkl')
|
||||||
|
for env in train_envs + eval_envs:
|
||||||
|
try:
|
||||||
|
env.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--configs', nargs='+', required=True)
|
||||||
|
args, remaining = parser.parse_known_args()
|
||||||
|
configs = yaml.safe_load((pathlib.Path(__file__).parent / 'configs.yaml').read_text())
|
||||||
|
config_ = {}
|
||||||
|
for name in args.configs:
|
||||||
|
config_.update(configs[name])
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
for key, value in config_.items():
|
||||||
|
arg_type = tools.args_type(value)
|
||||||
|
parser.add_argument(f'--{key}', type=arg_type, default=arg_type(value))
|
||||||
|
main(config_['logdir'], parser.parse_args(remaining))
|
83
DreamerV2/exploration.py
Normal file
83
DreamerV2/exploration.py
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras.mixed_precision import experimental as prec
|
||||||
|
from tensorflow_probability import distributions as tfd
|
||||||
|
|
||||||
|
import models
|
||||||
|
import networks
|
||||||
|
import tools
|
||||||
|
|
||||||
|
class Random(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
self._config = config
|
||||||
|
self._float = prec.global_policy().comput_dtype
|
||||||
|
|
||||||
|
def actor(self, feat):
|
||||||
|
shape = feat.shape[:-1] + [self._config.num_actions]
|
||||||
|
if self._config.actor_dist == 'onehot':
|
||||||
|
return tools.OneHotDist(tf.zeros(shape))
|
||||||
|
else:
|
||||||
|
ones = tf.ones(shape, self._float)
|
||||||
|
return tfd.Uniform(-ones, ones)
|
||||||
|
|
||||||
|
def train(self, start, feat, embed, kl):
|
||||||
|
return None, {}
|
||||||
|
|
||||||
|
|
||||||
|
class Plan2Explore(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, config, world_model, reward=None):
|
||||||
|
self._config = config
|
||||||
|
self._reward = reward
|
||||||
|
self._behavior = models.ImagBehavior(config, world_model)
|
||||||
|
self.actor = self._behavior.actor
|
||||||
|
size = {
|
||||||
|
'embed': 32 * config.cnn_depth,
|
||||||
|
'stoch': config.dyn_stoch,
|
||||||
|
'deter': config.dyn_deter,
|
||||||
|
'feat': config.dyn_stoch + config.dyn_deter,
|
||||||
|
}[self._config.disag_target]
|
||||||
|
kw = dict(
|
||||||
|
shape=size, layers=config.disag_layers, units=config.disag_units,
|
||||||
|
act=config.act)
|
||||||
|
self._networks = [
|
||||||
|
networks.DenseHead(**kw) for _ in range(config.disag_models)]
|
||||||
|
self._opt = tools.Optimizer(
|
||||||
|
'ensemble', config.model_lr, config.opt_eps, config.grad_clip,
|
||||||
|
config.weight_decay, opt=config.opt)
|
||||||
|
|
||||||
|
def train(self, start, feat, embed, kl):
|
||||||
|
metrics = {}
|
||||||
|
target = {
|
||||||
|
'embed': embed,
|
||||||
|
'stoch': start['stoch'],
|
||||||
|
'deter': start['deter'],
|
||||||
|
'feat': feat,
|
||||||
|
}[self._config.disag_target]
|
||||||
|
metrics.update(self._train_ensemble(feat, target))
|
||||||
|
metrics.update(self._behavior.train(start, self._intrinsic_reward)[-1])
|
||||||
|
return None, metrics
|
||||||
|
|
||||||
|
def _intrinsic_reward(self, feat, state, action):
|
||||||
|
preds = [head(feat, tf.float32).mean() for head in self._networks]
|
||||||
|
disag = tf.reduce_mean(tf.math.reduce_std(preds, 0), -1)
|
||||||
|
if self._config.disag_log:
|
||||||
|
disag = tf.math.log(disag)
|
||||||
|
reward = self._config.expl_intr_scale * disag
|
||||||
|
if self._config.expl_extr_scale:
|
||||||
|
reward += tf.cast(self._config.expl_extr_scale * self._reward(
|
||||||
|
feat, state, action), tf.float32)
|
||||||
|
return reward
|
||||||
|
|
||||||
|
def _train_ensemble(self, inputs, targets):
|
||||||
|
if self._config.disag_offset:
|
||||||
|
targets = targets[:, self._config.disag_offset:]
|
||||||
|
inputs = inputs[:, :-self._config.disag_offset]
|
||||||
|
targets = tf.stop_gradient(targets)
|
||||||
|
inputs = tf.stop_gradient(inputs)
|
||||||
|
with tf.GradientTape() as tape:
|
||||||
|
preds = [head(inputs) for head in self._networks]
|
||||||
|
likes = [tf.reduce_mean(pred.log_prob(targets)) for pred in preds]
|
||||||
|
loss = -tf.cast(tf.reduce_sum(likes), tf.float32)
|
||||||
|
metrics = self._opt(tape, loss, self._networks)
|
||||||
|
return metrics
|
429
DreamerV2/models.py
Normal file
429
DreamerV2/models.py
Normal file
@ -0,0 +1,429 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras.mixed_precision import experimental as prec
|
||||||
|
|
||||||
|
import networks
|
||||||
|
import tools
|
||||||
|
|
||||||
|
|
||||||
|
class WorldModel(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, step, config):
|
||||||
|
self._step = step
|
||||||
|
self._config = config
|
||||||
|
channels = (1 if config.atari_grayscale else 3)
|
||||||
|
shape = config.size + (channels,)
|
||||||
|
|
||||||
|
########
|
||||||
|
# Main #
|
||||||
|
########
|
||||||
|
self.encoder = networks.ConvEncoder(
|
||||||
|
config.cnn_depth, config.act, config.encoder_kernels)
|
||||||
|
self.dynamics = networks.RSSM(
|
||||||
|
config.dyn_stoch, config.dyn_deter, config.dyn_hidden,
|
||||||
|
config.dyn_input_layers, config.dyn_output_layers, config.dyn_shared,
|
||||||
|
config.dyn_discrete, config.act, config.dyn_mean_act,
|
||||||
|
config.dyn_std_act, config.dyn_min_std, config.dyn_cell)
|
||||||
|
self.heads = {}
|
||||||
|
self.heads['reward'] = networks.DenseHead(
|
||||||
|
[], config.reward_layers, config.units, config.act)
|
||||||
|
if config.pred_discount:
|
||||||
|
self.heads['discount'] = networks.DenseHead(
|
||||||
|
[], config.discount_layers, config.units, config.act, dist='binary')
|
||||||
|
self._model_opt = tools.Optimizer(
|
||||||
|
'model', config.model_lr, config.opt_eps, config.grad_clip,
|
||||||
|
config.weight_decay, opt=config.opt)
|
||||||
|
self._scales = dict(
|
||||||
|
reward=config.reward_scale, discount=config.discount_scale)
|
||||||
|
|
||||||
|
#########
|
||||||
|
# Disen #
|
||||||
|
#########
|
||||||
|
self.disen_encoder = networks.ConvEncoder(
|
||||||
|
config.disen_cnn_depth, config.act, config.encoder_kernels)
|
||||||
|
self.disen_dynamics = networks.RSSM(
|
||||||
|
config.dyn_stoch, config.dyn_deter, config.dyn_hidden,
|
||||||
|
config.dyn_input_layers, config.dyn_output_layers, config.dyn_shared,
|
||||||
|
config.dyn_discrete, config.act, config.dyn_mean_act,
|
||||||
|
config.dyn_std_act, config.dyn_min_std, config.dyn_cell)
|
||||||
|
|
||||||
|
self.disen_heads = {}
|
||||||
|
self.disen_heads['reward'] = networks.DenseHead(
|
||||||
|
[], config.reward_layers, config.units, config.act)
|
||||||
|
if config.pred_discount:
|
||||||
|
self.disen_heads['discount'] = networks.DenseHead(
|
||||||
|
[], config.discount_layers, config.units, config.act, dist='binary')
|
||||||
|
|
||||||
|
self._disen_model_opt = tools.Optimizer(
|
||||||
|
'disen', config.model_lr, config.opt_eps, config.grad_clip,
|
||||||
|
config.weight_decay, opt=config.opt)
|
||||||
|
|
||||||
|
self._disen_heads_opt = {}
|
||||||
|
self._disen_heads_opt['reward'] = tools.Optimizer(
|
||||||
|
'disen_reward', config.model_lr, config.opt_eps, config.grad_clip,
|
||||||
|
config.weight_decay, opt=config.opt)
|
||||||
|
if config.pred_discount:
|
||||||
|
self._disen_heads_opt['discount'] = tools.Optimizer(
|
||||||
|
'disen_pcont', config.model_lr, config.opt_eps, config.grad_clip,
|
||||||
|
config.weight_decay, opt=config.opt)
|
||||||
|
|
||||||
|
# negative signs for reward/discount here
|
||||||
|
self._disen_scales = dict(disen_only=config.disen_only_scale,
|
||||||
|
reward=-config.disen_reward_scale, discount=-config.disen_discount_scale)
|
||||||
|
|
||||||
|
self.disen_only_image_head = networks.ConvDecoder(
|
||||||
|
config.disen_cnn_depth, config.act, shape, config.decoder_kernels,
|
||||||
|
config.decoder_thin)
|
||||||
|
|
||||||
|
################
|
||||||
|
# Joint Decode #
|
||||||
|
################
|
||||||
|
self.image_head = networks.ConvDecoderMask(
|
||||||
|
config.cnn_depth, config.act, shape, config.decoder_kernels,
|
||||||
|
config.decoder_thin)
|
||||||
|
self.disen_image_head = networks.ConvDecoderMask(
|
||||||
|
config.disen_cnn_depth, config.act, shape, config.decoder_kernels,
|
||||||
|
config.decoder_thin)
|
||||||
|
self.joint_image_head = networks.ConvDecoderMaskEnsemble(
|
||||||
|
self.image_head, self.disen_image_head
|
||||||
|
)
|
||||||
|
|
||||||
|
def train(self, data):
|
||||||
|
data = self.preprocess(data)
|
||||||
|
with tf.GradientTape() as model_tape, tf.GradientTape() as disen_tape:
|
||||||
|
|
||||||
|
# kl schedule
|
||||||
|
kl_balance = tools.schedule(self._config.kl_balance, self._step)
|
||||||
|
kl_free = tools.schedule(self._config.kl_free, self._step)
|
||||||
|
kl_scale = tools.schedule(self._config.kl_scale, self._step)
|
||||||
|
|
||||||
|
# Main
|
||||||
|
embed = self.encoder(data)
|
||||||
|
post, prior = self.dynamics.observe(embed, data['action'])
|
||||||
|
kl_loss, kl_value = self.dynamics.kl_loss(
|
||||||
|
post, prior, kl_balance, kl_free, kl_scale)
|
||||||
|
feat = self.dynamics.get_feat(post)
|
||||||
|
likes = {}
|
||||||
|
for name, head in self.heads.items():
|
||||||
|
grad_head = (name in self._config.grad_heads)
|
||||||
|
inp = feat if grad_head else tf.stop_gradient(feat)
|
||||||
|
pred = head(inp, tf.float32)
|
||||||
|
like = pred.log_prob(tf.cast(data[name], tf.float32))
|
||||||
|
likes[name] = tf.reduce_mean(
|
||||||
|
like) * self._scales.get(name, 1.0)
|
||||||
|
|
||||||
|
# Disen
|
||||||
|
embed_disen = self.disen_encoder(data)
|
||||||
|
post_disen, prior_disen = self.disen_dynamics.observe(
|
||||||
|
embed_disen, data['action'])
|
||||||
|
kl_loss_disen, kl_value_disen = self.dynamics.kl_loss(
|
||||||
|
post_disen, prior_disen, kl_balance, kl_free, kl_scale)
|
||||||
|
feat_disen = self.disen_dynamics.get_feat(post_disen)
|
||||||
|
|
||||||
|
# Optimize disen reward/pcont till optimal
|
||||||
|
disen_metrics = dict(reward={}, discount={})
|
||||||
|
loss_disen = dict(reward=None, discount=None)
|
||||||
|
for _ in range(self._config.num_reward_opt_iters):
|
||||||
|
with tf.GradientTape() as disen_reward_tape, tf.GradientTape() as disen_pcont_tape:
|
||||||
|
disen_gradient_tapes = dict(
|
||||||
|
reward=disen_reward_tape, discount=disen_pcont_tape)
|
||||||
|
for name, head in self.disen_heads.items():
|
||||||
|
pred_disen = head(
|
||||||
|
tf.stop_gradient(feat_disen), tf.float32)
|
||||||
|
loss_disen[name] = -tf.reduce_mean(pred_disen.log_prob(
|
||||||
|
tf.cast(data[name], tf.float32)))
|
||||||
|
for name, head in self.disen_heads.items():
|
||||||
|
disen_metrics[name] = self._disen_heads_opt[name](
|
||||||
|
disen_gradient_tapes[name], loss_disen[name], [head], prefix='disen_neg')
|
||||||
|
|
||||||
|
# Compute likes for disen model (including negative gradients)
|
||||||
|
likes_disen = {}
|
||||||
|
for name, head in self.disen_heads.items():
|
||||||
|
pred_disen = head(feat_disen, tf.float32)
|
||||||
|
like_disen = pred_disen.log_prob(
|
||||||
|
tf.cast(data[name], tf.float32))
|
||||||
|
likes_disen[name] = tf.reduce_mean(
|
||||||
|
like_disen) * self._disen_scales.get(name, -1.0)
|
||||||
|
disen_only_image_pred = self.disen_only_image_head(
|
||||||
|
feat_disen, tf.float32)
|
||||||
|
disen_only_image_like = tf.reduce_mean(disen_only_image_pred.log_prob(
|
||||||
|
tf.cast(data['image'], tf.float32))) * self._disen_scales.get('disen_only', 1.0)
|
||||||
|
likes_disen['disen_only'] = disen_only_image_like
|
||||||
|
|
||||||
|
# Joint decode
|
||||||
|
image_pred_joint, _, _, _ = self.joint_image_head(
|
||||||
|
feat, feat_disen, tf.float32)
|
||||||
|
image_like = tf.reduce_mean(image_pred_joint.log_prob(
|
||||||
|
tf.cast(data['image'], tf.float32)))
|
||||||
|
likes['image'] = image_like
|
||||||
|
likes_disen['image'] = image_like
|
||||||
|
|
||||||
|
# Compute loss
|
||||||
|
model_loss = kl_loss - sum(likes.values())
|
||||||
|
disen_loss = kl_loss_disen - sum(likes_disen.values())
|
||||||
|
|
||||||
|
model_parts = [self.encoder, self.dynamics,
|
||||||
|
self.joint_image_head] + list(self.heads.values())
|
||||||
|
disen_parts = [self.disen_encoder, self.disen_dynamics,
|
||||||
|
self.joint_image_head, self.disen_only_image_head]
|
||||||
|
|
||||||
|
metrics = self._model_opt(
|
||||||
|
model_tape, model_loss, model_parts, prefix='main')
|
||||||
|
disen_model_metrics = self._disen_model_opt(
|
||||||
|
disen_tape, disen_loss, disen_parts, prefix='disen')
|
||||||
|
|
||||||
|
metrics['kl_balance'] = kl_balance
|
||||||
|
metrics['kl_free'] = kl_free
|
||||||
|
metrics['kl_scale'] = kl_scale
|
||||||
|
metrics.update({f'{name}_loss': -like for name,
|
||||||
|
like in likes.items()})
|
||||||
|
|
||||||
|
metrics['disen/disen_only_image_loss'] = -disen_only_image_like
|
||||||
|
metrics['disen/disen_reward_loss'] = -likes_disen['reward'] / \
|
||||||
|
self._disen_scales.get('reward', -1.0)
|
||||||
|
metrics['disen/disen_discount_loss'] = -likes_disen['discount'] / \
|
||||||
|
self._disen_scales.get('discount', -1.0)
|
||||||
|
|
||||||
|
metrics['kl'] = tf.reduce_mean(kl_value)
|
||||||
|
metrics['prior_ent'] = self.dynamics.get_dist(prior).entropy()
|
||||||
|
metrics['post_ent'] = self.dynamics.get_dist(post).entropy()
|
||||||
|
metrics['disen/kl'] = tf.reduce_mean(kl_value_disen)
|
||||||
|
metrics['disen/prior_ent'] = self.dynamics.get_dist(
|
||||||
|
prior_disen).entropy()
|
||||||
|
metrics['disen/post_ent'] = self.dynamics.get_dist(
|
||||||
|
post_disen).entropy()
|
||||||
|
|
||||||
|
metrics.update(
|
||||||
|
{f'{key}': value for key, value in disen_metrics['reward'].items()})
|
||||||
|
metrics.update(
|
||||||
|
{f'{key}': value for key, value in disen_metrics['discount'].items()})
|
||||||
|
metrics.update(
|
||||||
|
{f'{key}': value for key, value in disen_model_metrics.items()})
|
||||||
|
|
||||||
|
return embed, post, feat, kl_value, metrics
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def preprocess(self, obs):
|
||||||
|
dtype = prec.global_policy().compute_dtype
|
||||||
|
obs = obs.copy()
|
||||||
|
obs['image'] = tf.cast(obs['image'], dtype) / 255.0 - 0.5
|
||||||
|
obs['reward'] = getattr(tf, self._config.clip_rewards)(obs['reward'])
|
||||||
|
if 'discount' in obs:
|
||||||
|
obs['discount'] *= self._config.discount
|
||||||
|
for key, value in obs.items():
|
||||||
|
if tf.dtypes.as_dtype(value.dtype) in (
|
||||||
|
tf.float16, tf.float32, tf.float64):
|
||||||
|
obs[key] = tf.cast(value, dtype)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def video_pred(self, data):
|
||||||
|
data = self.preprocess(data)
|
||||||
|
truth = data['image'][:6] + 0.5
|
||||||
|
|
||||||
|
embed = self.encoder(data)
|
||||||
|
embed_disen = self.disen_encoder(data)
|
||||||
|
states, _ = self.dynamics.observe(
|
||||||
|
embed[:6, :5], data['action'][:6, :5])
|
||||||
|
states_disen, _ = self.disen_dynamics.observe(
|
||||||
|
embed_disen[:6, :5], data['action'][:6, :5])
|
||||||
|
feats = self.dynamics.get_feat(states)
|
||||||
|
feats_disen = self.disen_dynamics.get_feat(states_disen)
|
||||||
|
recon_joint, recon_main, recon_disen, recon_mask = self.joint_image_head(
|
||||||
|
feats, feats_disen)
|
||||||
|
recon_joint = recon_joint.mode()[:6]
|
||||||
|
recon_main = recon_main.mode()[:6]
|
||||||
|
recon_disen = recon_disen.mode()[:6]
|
||||||
|
recon_mask = recon_mask[:6]
|
||||||
|
|
||||||
|
init = {k: v[:, -1] for k, v in states.items()}
|
||||||
|
init_disen = {k: v[:, -1] for k, v in states_disen.items()}
|
||||||
|
prior = self.dynamics.imagine(
|
||||||
|
data['action'][:6, 5:], init)
|
||||||
|
prior_disen = self.disen_dynamics.imagine(
|
||||||
|
data['action'][:6, 5:], init_disen)
|
||||||
|
_feats = self.dynamics.get_feat(prior)
|
||||||
|
_feats_disen = self.disen_dynamics.get_feat(prior_disen)
|
||||||
|
openl_joint, openl_main, openl_disen, openl_mask = self.joint_image_head(
|
||||||
|
_feats, _feats_disen)
|
||||||
|
openl_joint = openl_joint.mode()
|
||||||
|
openl_main = openl_main.mode()
|
||||||
|
openl_disen = openl_disen.mode()
|
||||||
|
|
||||||
|
model_joint = tf.concat(
|
||||||
|
[recon_joint[:, :5] + 0.5, openl_joint + 0.5], 1)
|
||||||
|
error_joint = (model_joint - truth + 1) / 2
|
||||||
|
model_main = tf.concat(
|
||||||
|
[recon_main[:, :5] + 0.5, openl_main + 0.5], 1)
|
||||||
|
error_main = (model_main - truth + 1) / 2
|
||||||
|
model_disen = tf.concat(
|
||||||
|
[recon_disen[:, :5] + 0.5, openl_disen + 0.5], 1)
|
||||||
|
error_disen = (model_disen - truth + 1) / 2
|
||||||
|
model_mask = tf.concat(
|
||||||
|
[recon_mask[:, :5] + 0.5, openl_mask + 0.5], 1)
|
||||||
|
|
||||||
|
output_joint = tf.concat([truth, model_joint, error_joint], 2)
|
||||||
|
output_main = tf.concat([truth, model_main, error_main], 2)
|
||||||
|
output_disen = tf.concat([truth, model_disen, error_disen], 2)
|
||||||
|
output_mask = model_mask
|
||||||
|
|
||||||
|
return output_joint, output_main, output_disen, output_mask
|
||||||
|
|
||||||
|
|
||||||
|
class ImagBehavior(tools.Module):
|
||||||
|
|
||||||
|
def __init__(self, config, world_model, stop_grad_actor=True, reward=None):
|
||||||
|
self._config = config
|
||||||
|
self._world_model = world_model
|
||||||
|
self._stop_grad_actor = stop_grad_actor
|
||||||
|
self._reward = reward
|
||||||
|
self.actor = networks.ActionHead(
|
||||||
|
config.num_actions, config.actor_layers, config.units, config.act,
|
||||||
|
config.actor_dist, config.actor_init_std, config.actor_min_std,
|
||||||
|
config.actor_dist, config.actor_temp, config.actor_outscale)
|
||||||
|
self.value = networks.DenseHead(
|
||||||
|
[], config.value_layers, config.units, config.act,
|
||||||
|
config.value_head)
|
||||||
|
if config.slow_value_target or config.slow_actor_target:
|
||||||
|
self._slow_value = networks.DenseHead(
|
||||||
|
[], config.value_layers, config.units, config.act)
|
||||||
|
self._updates = tf.Variable(0, tf.int64)
|
||||||
|
kw = dict(wd=config.weight_decay, opt=config.opt)
|
||||||
|
self._actor_opt = tools.Optimizer(
|
||||||
|
'actor', config.actor_lr, config.opt_eps, config.actor_grad_clip, **kw)
|
||||||
|
self._value_opt = tools.Optimizer(
|
||||||
|
'value', config.value_lr, config.opt_eps, config.value_grad_clip, **kw)
|
||||||
|
|
||||||
|
def train(
|
||||||
|
self, start, objective=None, imagine=None, tape=None, repeats=None):
|
||||||
|
objective = objective or self._reward
|
||||||
|
self._update_slow_target()
|
||||||
|
metrics = {}
|
||||||
|
with (tape or tf.GradientTape()) as actor_tape:
|
||||||
|
assert bool(objective) != bool(imagine)
|
||||||
|
if objective:
|
||||||
|
imag_feat, imag_state, imag_action = self._imagine(
|
||||||
|
start, self.actor, self._config.imag_horizon, repeats)
|
||||||
|
reward = objective(imag_feat, imag_state, imag_action)
|
||||||
|
else:
|
||||||
|
imag_feat, imag_state, imag_action, reward = imagine(start)
|
||||||
|
actor_ent = self.actor(imag_feat, tf.float32).entropy()
|
||||||
|
state_ent = self._world_model.dynamics.get_dist(
|
||||||
|
imag_state, tf.float32).entropy()
|
||||||
|
target, weights = self._compute_target(
|
||||||
|
imag_feat, reward, actor_ent, state_ent,
|
||||||
|
self._config.slow_actor_target)
|
||||||
|
actor_loss, mets = self._compute_actor_loss(
|
||||||
|
imag_feat, imag_state, imag_action, target, actor_ent, state_ent,
|
||||||
|
weights)
|
||||||
|
metrics.update(mets)
|
||||||
|
if self._config.slow_value_target != self._config.slow_actor_target:
|
||||||
|
target, weights = self._compute_target(
|
||||||
|
imag_feat, reward, actor_ent, state_ent,
|
||||||
|
self._config.slow_value_target)
|
||||||
|
with tf.GradientTape() as value_tape:
|
||||||
|
value = self.value(imag_feat, tf.float32)[:-1]
|
||||||
|
value_loss = -value.log_prob(tf.stop_gradient(target))
|
||||||
|
if self._config.value_decay:
|
||||||
|
value_loss += self._config.value_decay * value.mode()
|
||||||
|
value_loss = tf.reduce_mean(weights[:-1] * value_loss)
|
||||||
|
metrics['reward_mean'] = tf.reduce_mean(reward)
|
||||||
|
metrics['reward_std'] = tf.math.reduce_std(reward)
|
||||||
|
metrics['actor_ent'] = tf.reduce_mean(actor_ent)
|
||||||
|
metrics.update(self._actor_opt(actor_tape, actor_loss, [self.actor]))
|
||||||
|
metrics.update(self._value_opt(value_tape, value_loss, [self.value]))
|
||||||
|
return imag_feat, imag_state, imag_action, weights, metrics
|
||||||
|
|
||||||
|
def _imagine(self, start, policy, horizon, repeats=None):
|
||||||
|
dynamics = self._world_model.dynamics
|
||||||
|
if repeats:
|
||||||
|
start = {k: tf.repeat(v, repeats, axis=1)
|
||||||
|
for k, v in start.items()}
|
||||||
|
|
||||||
|
def flatten(x): return tf.reshape(x, [-1] + list(x.shape[2:]))
|
||||||
|
start = {k: flatten(v) for k, v in start.items()}
|
||||||
|
|
||||||
|
def step(prev, _):
|
||||||
|
state, _, _ = prev
|
||||||
|
feat = dynamics.get_feat(state)
|
||||||
|
inp = tf.stop_gradient(feat) if self._stop_grad_actor else feat
|
||||||
|
action = policy(inp).sample()
|
||||||
|
succ = dynamics.img_step(
|
||||||
|
state, action, sample=self._config.imag_sample)
|
||||||
|
return succ, feat, action
|
||||||
|
feat = 0 * dynamics.get_feat(start)
|
||||||
|
action = policy(feat).mode()
|
||||||
|
succ, feats, actions = tools.static_scan(
|
||||||
|
step, tf.range(horizon), (start, feat, action))
|
||||||
|
states = {k: tf.concat([
|
||||||
|
start[k][None], v[:-1]], 0) for k, v in succ.items()}
|
||||||
|
if repeats:
|
||||||
|
def unfold(tensor):
|
||||||
|
s = tensor.shape
|
||||||
|
return tf.reshape(tensor, [s[0], s[1] // repeats, repeats] + s[2:])
|
||||||
|
states, feats, actions = tf.nest.map_structure(
|
||||||
|
unfold, (states, feats, actions))
|
||||||
|
return feats, states, actions
|
||||||
|
|
||||||
|
def _compute_target(self, imag_feat, reward, actor_ent, state_ent, slow):
|
||||||
|
reward = tf.cast(reward, tf.float32)
|
||||||
|
if 'discount' in self._world_model.heads:
|
||||||
|
discount = self._world_model.heads['discount'](
|
||||||
|
imag_feat, tf.float32).mean()
|
||||||
|
else:
|
||||||
|
discount = self._config.discount * tf.ones_like(reward)
|
||||||
|
if self._config.future_entropy and tf.greater(
|
||||||
|
self._config.actor_entropy(), 0):
|
||||||
|
reward += self._config.actor_entropy() * actor_ent
|
||||||
|
if self._config.future_entropy and tf.greater(
|
||||||
|
self._config.actor_state_entropy(), 0):
|
||||||
|
reward += self._config.actor_state_entropy() * state_ent
|
||||||
|
if slow:
|
||||||
|
value = self._slow_value(imag_feat, tf.float32).mode()
|
||||||
|
else:
|
||||||
|
value = self.value(imag_feat, tf.float32).mode()
|
||||||
|
target = tools.lambda_return(
|
||||||
|
reward[:-1], value[:-1], discount[:-1],
|
||||||
|
bootstrap=value[-1], lambda_=self._config.discount_lambda, axis=0)
|
||||||
|
weights = tf.stop_gradient(tf.math.cumprod(tf.concat(
|
||||||
|
[tf.ones_like(discount[:1]), discount[:-1]], 0), 0))
|
||||||
|
return target, weights
|
||||||
|
|
||||||
|
def _compute_actor_loss(
|
||||||
|
self, imag_feat, imag_state, imag_action, target, actor_ent, state_ent,
|
||||||
|
weights):
|
||||||
|
metrics = {}
|
||||||
|
inp = tf.stop_gradient(
|
||||||
|
imag_feat) if self._stop_grad_actor else imag_feat
|
||||||
|
policy = self.actor(inp, tf.float32)
|
||||||
|
actor_ent = policy.entropy()
|
||||||
|
if self._config.imag_gradient == 'dynamics':
|
||||||
|
actor_target = target
|
||||||
|
elif self._config.imag_gradient == 'reinforce':
|
||||||
|
imag_action = tf.cast(imag_action, tf.float32)
|
||||||
|
actor_target = policy.log_prob(imag_action)[:-1] * tf.stop_gradient(
|
||||||
|
target - self.value(imag_feat[:-1], tf.float32).mode())
|
||||||
|
elif self._config.imag_gradient == 'both':
|
||||||
|
imag_action = tf.cast(imag_action, tf.float32)
|
||||||
|
actor_target = policy.log_prob(imag_action)[:-1] * tf.stop_gradient(
|
||||||
|
target - self.value(imag_feat[:-1], tf.float32).mode())
|
||||||
|
mix = self._config.imag_gradient_mix()
|
||||||
|
actor_target = mix * target + (1 - mix) * actor_target
|
||||||
|
metrics['imag_gradient_mix'] = mix
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(self._config.imag_gradient)
|
||||||
|
if not self._config.future_entropy and tf.greater(
|
||||||
|
self._config.actor_entropy(), 0):
|
||||||
|
actor_target += self._config.actor_entropy() * actor_ent[:-1]
|
||||||
|
if not self._config.future_entropy and tf.greater(
|
||||||
|
self._config.actor_state_entropy(), 0):
|
||||||
|
actor_target += self._config.actor_state_entropy() * state_ent[:-1]
|
||||||
|
actor_loss = -tf.reduce_mean(weights[:-1] * actor_target)
|
||||||
|
return actor_loss, metrics
|
||||||
|
|
||||||
|
def _update_slow_target(self):
|
||||||
|
if self._config.slow_value_target or self._config.slow_actor_target:
|
||||||
|
if self._updates % self._config.slow_target_update == 0:
|
||||||
|
mix = self._config.slow_target_fraction
|
||||||
|
for s, d in zip(self.value.variables, self._slow_value.variables):
|
||||||
|
d.assign(mix * s + (1 - mix) * d)
|
||||||
|
self._updates.assign_add(1)
|
465
DreamerV2/networks.py
Normal file
465
DreamerV2/networks.py
Normal file
@ -0,0 +1,465 @@
|
|||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.keras import layers as tfkl
|
||||||
|
from tensorflow_probability import distributions as tfd
|
||||||
|
from tensorflow.keras.mixed_precision import experimental as prec
|
||||||
|
|
||||||
|
import tools
|
||||||
|
|
||||||
|
class RSSM(tools.Module):
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, stoch=30, deter=200, hidden=200, layers_input=1, layers_output=1,
|
||||||
|
shared=False, discrete=False, act=tf.nn.elu, mean_act='none',
|
||||||
|
std_act='softplus', min_std=0.1, cell='keras'):
|
||||||
|
super().__init__()
|
||||||
|
self._stoch = stoch
|
||||||
|
self._deter = deter
|
||||||
|
self._hidden = hidden
|
||||||
|
self._min_std = min_std
|
||||||
|
self._layers_input = layers_input
|
||||||
|
self._layers_output = layers_output
|
||||||
|
self._shared = shared
|
||||||
|
self._discrete = discrete
|
||||||
|
self._act = act
|
||||||
|
self._mean_act = mean_act
|
||||||
|
self._std_act = std_act
|
||||||
|
self._embed = None
|
||||||
|
if cell == 'gru':
|
||||||
|
self._cell = tfkl.GRUCell(self._deter)
|
||||||
|
elif cell == 'gru_layer_norm':
|
||||||
|
self._cell = GRUCell(self._deter, norm=True)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(cell)
|
||||||
|
|
||||||
|
def initial(self, batch_size):
|
||||||
|
dtype = prec.global_policy().compute_dtype
|
||||||
|
if self._discrete:
|
||||||
|
state = dict(
|
||||||
|
logit=tf.zeros(
|
||||||
|
[batch_size, self._stoch, self._discrete], dtype),
|
||||||
|
stoch=tf.zeros(
|
||||||
|
[batch_size, self._stoch, self._discrete], dtype),
|
||||||
|
deter=self._cell.get_initial_state(None, batch_size, dtype))
|
||||||
|
else:
|
||||||
|
state = dict(
|
||||||
|
mean=tf.zeros([batch_size, self._stoch], dtype),
|
||||||
|
std=tf.zeros([batch_size, self._stoch], dtype),
|
||||||
|
stoch=tf.zeros([batch_size, self._stoch], dtype),
|
||||||
|
deter=self._cell.get_initial_state(None, batch_size, dtype))
|
||||||
|
return state
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def observe(self, embed, action, state=None):
|
||||||
|
def swap(x): return tf.transpose(
|
||||||
|
x, [1, 0] + list(range(2, len(x.shape))))
|
||||||
|
if state is None:
|
||||||
|
state = self.initial(tf.shape(action)[0])
|
||||||
|
embed, action = swap(embed), swap(action)
|
||||||
|
post, prior = tools.static_scan(
|
||||||
|
lambda prev, inputs: self.obs_step(prev[0], *inputs),
|
||||||
|
(action, embed), (state, state))
|
||||||
|
post = {k: swap(v) for k, v in post.items()}
|
||||||
|
prior = {k: swap(v) for k, v in prior.items()}
|
||||||
|
return post, prior
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def imagine(self, action, state=None):
|
||||||
|
def swap(x): return tf.transpose(
|
||||||
|
x, [1, 0] + list(range(2, len(x.shape))))
|
||||||
|
if state is None:
|
||||||
|
state = self.initial(tf.shape(action)[0])
|
||||||
|
assert isinstance(state, dict), state
|
||||||
|
action = swap(action)
|
||||||
|
prior = tools.static_scan(self.img_step, action, state)
|
||||||
|
prior = {k: swap(v) for k, v in prior.items()}
|
||||||
|
return prior
|
||||||
|
|
||||||
|
def get_feat(self, state):
|
||||||
|
stoch = state['stoch']
|
||||||
|
if self._discrete:
|
||||||
|
shape = stoch.shape[:-2] + [self._stoch * self._discrete]
|
||||||
|
stoch = tf.reshape(stoch, shape)
|
||||||
|
return tf.concat([stoch, state['deter']], -1)
|
||||||
|
|
||||||
|
def get_dist(self, state, dtype=None):
|
||||||
|
if self._discrete:
|
||||||
|
logit = state['logit']
|
||||||
|
logit = tf.cast(logit, tf.float32)
|
||||||
|
dist = tfd.Independent(tools.OneHotDist(logit), 1)
|
||||||
|
if dtype != tf.float32:
|
||||||
|
dist = tools.DtypeDist(dist, dtype or state['logit'].dtype)
|
||||||
|
else:
|
||||||
|
mean, std = state['mean'], state['std']
|
||||||
|
if dtype:
|
||||||
|
mean = tf.cast(mean, dtype)
|
||||||
|
std = tf.cast(std, dtype)
|
||||||
|
dist = tfd.MultivariateNormalDiag(mean, std)
|
||||||
|
return dist
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def obs_step(self, prev_state, prev_action, embed, sample=True):
|
||||||
|
if not self._embed:
|
||||||
|
self._embed = embed.shape[-1]
|
||||||
|
prior = self.img_step(prev_state, prev_action, None, sample)
|
||||||
|
if self._shared:
|
||||||
|
post = self.img_step(prev_state, prev_action, embed, sample)
|
||||||
|
else:
|
||||||
|
x = tf.concat([prior['deter'], embed], -1)
|
||||||
|
for i in range(self._layers_output):
|
||||||
|
x = self.get(f'obi{i}', tfkl.Dense, self._hidden, self._act)(x)
|
||||||
|
stats = self._suff_stats_layer('obs', x)
|
||||||
|
if sample:
|
||||||
|
stoch = self.get_dist(stats).sample()
|
||||||
|
else:
|
||||||
|
stoch = self.get_dist(stats).mode()
|
||||||
|
post = {'stoch': stoch, 'deter': prior['deter'], **stats}
|
||||||
|
return post, prior
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def img_step(self, prev_state, prev_action, embed=None, sample=True):
|
||||||
|
prev_stoch = prev_state['stoch']
|
||||||
|
if self._discrete:
|
||||||
|
shape = prev_stoch.shape[:-2] + [self._stoch * self._discrete]
|
||||||
|
prev_stoch = tf.reshape(prev_stoch, shape)
|
||||||
|
if self._shared:
|
||||||
|
if embed is None:
|
||||||
|
shape = prev_action.shape[:-1] + [self._embed]
|
||||||
|
embed = tf.zeros(shape, prev_action.dtype)
|
||||||
|
x = tf.concat([prev_stoch, prev_action, embed], -1)
|
||||||
|
else:
|
||||||
|
x = tf.concat([prev_stoch, prev_action], -1)
|
||||||
|
for i in range(self._layers_input):
|
||||||
|
x = self.get(f'ini{i}', tfkl.Dense, self._hidden, self._act)(x)
|
||||||
|
x, deter = self._cell(x, [prev_state['deter']])
|
||||||
|
deter = deter[0] # Keras wraps the state in a list.
|
||||||
|
for i in range(self._layers_output):
|
||||||
|
x = self.get(f'imo{i}', tfkl.Dense, self._hidden, self._act)(x)
|
||||||
|
stats = self._suff_stats_layer('ims', x)
|
||||||
|
if sample:
|
||||||
|
stoch = self.get_dist(stats).sample()
|
||||||
|
else:
|
||||||
|
stoch = self.get_dist(stats).mode()
|
||||||
|
prior = {'stoch': stoch, 'deter': deter, **stats}
|
||||||
|
return prior
|
||||||
|
|
||||||
|
def _suff_stats_layer(self, name, x):
|
||||||
|
if self._discrete:
|
||||||
|
x = self.get(name, tfkl.Dense, self._stoch *
|
||||||
|
self._discrete, None)(x)
|
||||||
|
logit = tf.reshape(x, x.shape[:-1] + [self._stoch, self._discrete])
|
||||||
|
return {'logit': logit}
|
||||||
|
else:
|
||||||
|
x = self.get(name, tfkl.Dense, 2 * self._stoch, None)(x)
|
||||||
|
mean, std = tf.split(x, 2, -1)
|
||||||
|
mean = {
|
||||||
|
'none': lambda: mean,
|
||||||
|
'tanh5': lambda: 5.0 * tf.math.tanh(mean / 5.0),
|
||||||
|
}[self._mean_act]()
|
||||||
|
std = {
|
||||||
|
'softplus': lambda: tf.nn.softplus(std),
|
||||||
|
'abs': lambda: tf.math.abs(std + 1),
|
||||||
|
'sigmoid': lambda: tf.nn.sigmoid(std),
|
||||||
|
'sigmoid2': lambda: 2 * tf.nn.sigmoid(std / 2),
|
||||||
|
}[self._std_act]()
|
||||||
|
std = std + self._min_std
|
||||||
|
return {'mean': mean, 'std': std}
|
||||||
|
|
||||||
|
def kl_loss(self, post, prior, balance, free, scale):
|
||||||
|
kld = tfd.kl_divergence
|
||||||
|
def dist(x): return self.get_dist(x, tf.float32)
|
||||||
|
if balance == 0.5:
|
||||||
|
value = kld(dist(prior), dist(post))
|
||||||
|
loss = tf.reduce_mean(tf.maximum(value, free))
|
||||||
|
else:
|
||||||
|
def sg(x): return tf.nest.map_structure(tf.stop_gradient, x)
|
||||||
|
value = kld(dist(prior), dist(sg(post)))
|
||||||
|
pri = tf.reduce_mean(value)
|
||||||
|
pos = tf.reduce_mean(kld(dist(sg(prior)), dist(post)))
|
||||||
|
pri, pos = tf.maximum(pri, free), tf.maximum(pos, free)
|
||||||
|
loss = balance * pri + (1 - balance) * pos
|
||||||
|
loss *= scale
|
||||||
|
return loss, value
|
||||||
|
|
||||||
|
|
||||||
|
class ConvEncoder(tools.Module):
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, depth=32, act=tf.nn.relu, kernels=(4, 4, 4, 4)):
|
||||||
|
self._act = act
|
||||||
|
self._depth = depth
|
||||||
|
self._kernels = kernels
|
||||||
|
|
||||||
|
def __call__(self, obs):
|
||||||
|
kwargs = dict(strides=2, activation=self._act)
|
||||||
|
Conv = tfkl.Conv2D
|
||||||
|
x = tf.reshape(obs['image'], (-1,) + tuple(obs['image'].shape[-3:]))
|
||||||
|
x = self.get('h1', Conv, 1 * self._depth,
|
||||||
|
self._kernels[0], **kwargs)(x)
|
||||||
|
x = self.get('h2', Conv, 2 * self._depth,
|
||||||
|
self._kernels[1], **kwargs)(x)
|
||||||
|
x = self.get('h3', Conv, 4 * self._depth,
|
||||||
|
self._kernels[2], **kwargs)(x)
|
||||||
|
x = self.get('h4', Conv, 8 * self._depth,
|
||||||
|
self._kernels[3], **kwargs)(x)
|
||||||
|
x = tf.reshape(x, [x.shape[0], np.prod(x.shape[1:])])
|
||||||
|
shape = tf.concat([tf.shape(obs['image'])[:-3], [x.shape[-1]]], 0)
|
||||||
|
return tf.reshape(x, shape)
|
||||||
|
|
||||||
|
|
||||||
|
class ConvDecoder(tools.Module):
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, depth=32, act=tf.nn.relu, shape=(64, 64, 3), kernels=(5, 5, 6, 6),
|
||||||
|
thin=True):
|
||||||
|
self._act = act
|
||||||
|
self._depth = depth
|
||||||
|
self._shape = shape
|
||||||
|
self._kernels = kernels
|
||||||
|
self._thin = thin
|
||||||
|
|
||||||
|
def __call__(self, features, dtype=None):
|
||||||
|
kwargs = dict(strides=2, activation=self._act)
|
||||||
|
ConvT = tfkl.Conv2DTranspose
|
||||||
|
if self._thin:
|
||||||
|
x = self.get('h1', tfkl.Dense, 32 * self._depth, None)(features)
|
||||||
|
x = tf.reshape(x, [-1, 1, 1, 32 * self._depth])
|
||||||
|
else:
|
||||||
|
x = self.get('h1', tfkl.Dense, 128 * self._depth, None)(features)
|
||||||
|
x = tf.reshape(x, [-1, 2, 2, 32 * self._depth])
|
||||||
|
x = self.get('h2', ConvT, 4 * self._depth,
|
||||||
|
self._kernels[0], **kwargs)(x)
|
||||||
|
x = self.get('h3', ConvT, 2 * self._depth,
|
||||||
|
self._kernels[1], **kwargs)(x)
|
||||||
|
x = self.get('h4', ConvT, 1 * self._depth,
|
||||||
|
self._kernels[2], **kwargs)(x)
|
||||||
|
x = self.get(
|
||||||
|
'h5', ConvT, self._shape[-1], self._kernels[3], strides=2)(x)
|
||||||
|
mean = tf.reshape(x, tf.concat(
|
||||||
|
[tf.shape(features)[:-1], self._shape], 0))
|
||||||
|
if dtype:
|
||||||
|
mean = tf.cast(mean, dtype)
|
||||||
|
return tfd.Independent(tfd.Normal(mean, 1), len(self._shape))
|
||||||
|
|
||||||
|
|
||||||
|
class ConvDecoderMask(tools.Module):
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, depth=32, act=tf.nn.relu, shape=(64, 64, 3), kernels=(5, 5, 6, 6),
|
||||||
|
thin=True):
|
||||||
|
self._act = act
|
||||||
|
self._depth = depth
|
||||||
|
self._shape = shape
|
||||||
|
self._kernels = kernels
|
||||||
|
self._thin = thin
|
||||||
|
|
||||||
|
def __call__(self, features, dtype=None):
|
||||||
|
kwargs = dict(strides=2, activation=self._act)
|
||||||
|
ConvT = tfkl.Conv2DTranspose
|
||||||
|
if self._thin:
|
||||||
|
x = self.get('h1', tfkl.Dense, 32 * self._depth, None)(features)
|
||||||
|
x = tf.reshape(x, [-1, 1, 1, 32 * self._depth])
|
||||||
|
else:
|
||||||
|
x = self.get('h1', tfkl.Dense, 128 * self._depth, None)(features)
|
||||||
|
x = tf.reshape(x, [-1, 2, 2, 32 * self._depth])
|
||||||
|
x = self.get('h2', ConvT, 4 * self._depth,
|
||||||
|
self._kernels[0], **kwargs)(x)
|
||||||
|
x = self.get('h3', ConvT, 2 * self._depth,
|
||||||
|
self._kernels[1], **kwargs)(x)
|
||||||
|
x = self.get('h4', ConvT, 1 * self._depth,
|
||||||
|
self._kernels[2], **kwargs)(x)
|
||||||
|
x = self.get(
|
||||||
|
'h5', ConvT, 2 * self._shape[-1], self._kernels[3], strides=2)(x)
|
||||||
|
mean, mask = tf.split(x, [self._shape[-1], self._shape[-1]], -1)
|
||||||
|
mean = tf.reshape(mean, tf.concat(
|
||||||
|
[tf.shape(features)[:-1], self._shape], 0))
|
||||||
|
mask = tf.reshape(mask, tf.concat(
|
||||||
|
[tf.shape(features)[:-1], self._shape], 0))
|
||||||
|
if dtype:
|
||||||
|
mean = tf.cast(mean, dtype)
|
||||||
|
mask = tf.cast(mask, dtype)
|
||||||
|
return tfd.Independent(tfd.Normal(mean, 1), len(self._shape)), mask
|
||||||
|
|
||||||
|
|
||||||
|
class ConvDecoderMaskEnsemble(tools.Module):
|
||||||
|
"""
|
||||||
|
ensemble two convdecoder with <Normal, mask> outputs
|
||||||
|
NOTE: remove pred1/pred2 for maximum performance.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, decoder1, decoder2):
|
||||||
|
self._decoder1 = decoder1
|
||||||
|
self._decoder2 = decoder2
|
||||||
|
self._shape = decoder1._shape
|
||||||
|
|
||||||
|
def __call__(self, feat1, feat2, dtype=None):
|
||||||
|
kwargs = dict(strides=1, activation=tf.nn.sigmoid)
|
||||||
|
pred1, mask1 = self._decoder1(feat1, dtype)
|
||||||
|
pred2, mask2 = self._decoder2(feat2, dtype)
|
||||||
|
mean1 = pred1.submodules[0].loc
|
||||||
|
mean2 = pred2.submodules[0].loc
|
||||||
|
mask_feat = tf.concat([mask1, mask2], -1)
|
||||||
|
mask = self.get('mask1', tfkl.Conv2D, 1, 1, **kwargs)(mask_feat)
|
||||||
|
mask_use1 = mask
|
||||||
|
mask_use2 = 1-mask
|
||||||
|
mean = mean1 * tf.cast(mask_use1, mean1.dtype) + \
|
||||||
|
mean2 * tf.cast(mask_use2, mean2.dtype)
|
||||||
|
return tfd.Independent(tfd.Normal(mean, 1), len(self._shape)), pred1, pred2, tf.cast(mask_use1, mean1.dtype)
|
||||||
|
|
||||||
|
|
||||||
|
class DenseHead(tools.Module):
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, shape, layers, units, act=tf.nn.elu, dist='normal', std=1.0):
|
||||||
|
self._shape = (shape,) if isinstance(shape, int) else shape
|
||||||
|
self._layers = layers
|
||||||
|
self._units = units
|
||||||
|
self._act = act
|
||||||
|
self._dist = dist
|
||||||
|
self._std = std
|
||||||
|
|
||||||
|
def __call__(self, features, dtype=None):
|
||||||
|
x = features
|
||||||
|
for index in range(self._layers):
|
||||||
|
x = self.get(f'h{index}', tfkl.Dense, self._units, self._act)(x)
|
||||||
|
mean = self.get(f'hmean', tfkl.Dense, np.prod(self._shape))(x)
|
||||||
|
mean = tf.reshape(mean, tf.concat(
|
||||||
|
[tf.shape(features)[:-1], self._shape], 0))
|
||||||
|
if self._std == 'learned':
|
||||||
|
std = self.get(f'hstd', tfkl.Dense, np.prod(self._shape))(x)
|
||||||
|
std = tf.nn.softplus(std) + 0.01
|
||||||
|
std = tf.reshape(std, tf.concat(
|
||||||
|
[tf.shape(features)[:-1], self._shape], 0))
|
||||||
|
else:
|
||||||
|
std = self._std
|
||||||
|
if dtype:
|
||||||
|
mean, std = tf.cast(mean, dtype), tf.cast(std, dtype)
|
||||||
|
if self._dist == 'normal':
|
||||||
|
return tfd.Independent(tfd.Normal(mean, std), len(self._shape))
|
||||||
|
if self._dist == 'huber':
|
||||||
|
return tfd.Independent(
|
||||||
|
tools.UnnormalizedHuber(mean, std, 1.0), len(self._shape))
|
||||||
|
if self._dist == 'binary':
|
||||||
|
return tfd.Independent(tfd.Bernoulli(mean), len(self._shape))
|
||||||
|
raise NotImplementedError(self._dist)
|
||||||
|
|
||||||
|
|
||||||
|
class ActionHead(tools.Module):
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, size, layers, units, act=tf.nn.elu, dist='trunc_normal',
|
||||||
|
init_std=0.0, min_std=0.1, action_disc=5, temp=0.1, outscale=0):
|
||||||
|
# assert min_std <= 2
|
||||||
|
self._size = size
|
||||||
|
self._layers = layers
|
||||||
|
self._units = units
|
||||||
|
self._dist = dist
|
||||||
|
self._act = act
|
||||||
|
self._min_std = min_std
|
||||||
|
self._init_std = init_std
|
||||||
|
self._action_disc = action_disc
|
||||||
|
self._temp = temp() if callable(temp) else temp
|
||||||
|
self._outscale = outscale
|
||||||
|
|
||||||
|
def __call__(self, features, dtype=None):
|
||||||
|
x = features
|
||||||
|
for index in range(self._layers):
|
||||||
|
kw = {}
|
||||||
|
if index == self._layers - 1 and self._outscale:
|
||||||
|
kw['kernel_initializer'] = tf.keras.initializers.VarianceScaling(
|
||||||
|
self._outscale)
|
||||||
|
x = self.get(f'h{index}', tfkl.Dense,
|
||||||
|
self._units, self._act, **kw)(x)
|
||||||
|
if self._dist == 'tanh_normal':
|
||||||
|
# https://www.desmos.com/calculator/rcmcf5jwe7
|
||||||
|
x = self.get(f'hout', tfkl.Dense, 2 * self._size)(x)
|
||||||
|
if dtype:
|
||||||
|
x = tf.cast(x, dtype)
|
||||||
|
mean, std = tf.split(x, 2, -1)
|
||||||
|
mean = tf.tanh(mean)
|
||||||
|
std = tf.nn.softplus(std + self._init_std) + self._min_std
|
||||||
|
dist = tfd.Normal(mean, std)
|
||||||
|
dist = tfd.TransformedDistribution(dist, tools.TanhBijector())
|
||||||
|
dist = tfd.Independent(dist, 1)
|
||||||
|
dist = tools.SampleDist(dist)
|
||||||
|
elif self._dist == 'tanh_normal_5':
|
||||||
|
x = self.get(f'hout', tfkl.Dense, 2 * self._size)(x)
|
||||||
|
if dtype:
|
||||||
|
x = tf.cast(x, dtype)
|
||||||
|
mean, std = tf.split(x, 2, -1)
|
||||||
|
mean = 5 * tf.tanh(mean / 5)
|
||||||
|
std = tf.nn.softplus(std + 5) + 5
|
||||||
|
dist = tfd.Normal(mean, std)
|
||||||
|
dist = tfd.TransformedDistribution(dist, tools.TanhBijector())
|
||||||
|
dist = tfd.Independent(dist, 1)
|
||||||
|
dist = tools.SampleDist(dist)
|
||||||
|
elif self._dist == 'normal':
|
||||||
|
x = self.get(f'hout', tfkl.Dense, 2 * self._size)(x)
|
||||||
|
if dtype:
|
||||||
|
x = tf.cast(x, dtype)
|
||||||
|
mean, std = tf.split(x, 2, -1)
|
||||||
|
std = tf.nn.softplus(std + self._init_std) + self._min_std
|
||||||
|
dist = tfd.Normal(mean, std)
|
||||||
|
dist = tfd.Independent(dist, 1)
|
||||||
|
elif self._dist == 'normal_1':
|
||||||
|
mean = self.get(f'hout', tfkl.Dense, self._size)(x)
|
||||||
|
if dtype:
|
||||||
|
mean = tf.cast(mean, dtype)
|
||||||
|
dist = tfd.Normal(mean, 1)
|
||||||
|
dist = tfd.Independent(dist, 1)
|
||||||
|
elif self._dist == 'trunc_normal':
|
||||||
|
# https://www.desmos.com/calculator/mmuvuhnyxo
|
||||||
|
x = self.get(f'hout', tfkl.Dense, 2 * self._size)(x)
|
||||||
|
x = tf.cast(x, tf.float32)
|
||||||
|
mean, std = tf.split(x, 2, -1)
|
||||||
|
mean = tf.tanh(mean)
|
||||||
|
std = 2 * tf.nn.sigmoid(std / 2) + self._min_std
|
||||||
|
dist = tools.SafeTruncatedNormal(mean, std, -1, 1)
|
||||||
|
dist = tools.DtypeDist(dist, dtype)
|
||||||
|
dist = tfd.Independent(dist, 1)
|
||||||
|
elif self._dist == 'onehot':
|
||||||
|
x = self.get(f'hout', tfkl.Dense, self._size)(x)
|
||||||
|
x = tf.cast(x, tf.float32)
|
||||||
|
dist = tools.OneHotDist(x, dtype=dtype)
|
||||||
|
dist = tools.DtypeDist(dist, dtype)
|
||||||
|
elif self._dist == 'onehot_gumble':
|
||||||
|
x = self.get(f'hout', tfkl.Dense, self._size)(x)
|
||||||
|
if dtype:
|
||||||
|
x = tf.cast(x, dtype)
|
||||||
|
temp = self._temp
|
||||||
|
dist = tools.GumbleDist(temp, x, dtype=dtype)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(self._dist)
|
||||||
|
return dist
|
||||||
|
|
||||||
|
|
||||||
|
class GRUCell(tf.keras.layers.AbstractRNNCell):
|
||||||
|
|
||||||
|
def __init__(self, size, norm=False, act=tf.tanh, update_bias=-1, **kwargs):
|
||||||
|
super().__init__()
|
||||||
|
self._size = size
|
||||||
|
self._act = act
|
||||||
|
self._norm = norm
|
||||||
|
self._update_bias = update_bias
|
||||||
|
self._layer = tfkl.Dense(3 * size, use_bias=norm is not None, **kwargs)
|
||||||
|
if norm:
|
||||||
|
self._norm = tfkl.LayerNormalization(dtype=tf.float32)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def state_size(self):
|
||||||
|
return self._size
|
||||||
|
|
||||||
|
def call(self, inputs, state):
|
||||||
|
state = state[0] # Keras wraps the state in a list.
|
||||||
|
parts = self._layer(tf.concat([inputs, state], -1))
|
||||||
|
if self._norm:
|
||||||
|
dtype = parts.dtype
|
||||||
|
parts = tf.cast(parts, tf.float32)
|
||||||
|
parts = self._norm(parts)
|
||||||
|
parts = tf.cast(parts, dtype)
|
||||||
|
reset, cand, update = tf.split(parts, 3, -1)
|
||||||
|
reset = tf.nn.sigmoid(reset)
|
||||||
|
cand = self._act(reset * cand)
|
||||||
|
update = tf.nn.sigmoid(update + self._update_bias)
|
||||||
|
output = update * cand + (1 - update) * state
|
||||||
|
return output, [output]
|
694
DreamerV2/tools.py
Normal file
694
DreamerV2/tools.py
Normal file
@ -0,0 +1,694 @@
|
|||||||
|
import datetime
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
import pickle
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
import tensorflow.compat.v1 as tf1
|
||||||
|
import tensorflow_probability as tfp
|
||||||
|
from tensorflow.keras.mixed_precision import experimental as prec
|
||||||
|
from tensorflow_probability import distributions as tfd
|
||||||
|
|
||||||
|
|
||||||
|
# Patch to ignore seed to avoid synchronization across GPUs.
|
||||||
|
_orig_random_categorical = tf.random.categorical
|
||||||
|
def random_categorical(*args, **kwargs):
|
||||||
|
kwargs['seed'] = None
|
||||||
|
return _orig_random_categorical(*args, **kwargs)
|
||||||
|
tf.random.categorical = random_categorical
|
||||||
|
|
||||||
|
# Patch to ignore seed to avoid synchronization across GPUs.
|
||||||
|
_orig_random_normal = tf.random.normal
|
||||||
|
def random_normal(*args, **kwargs):
|
||||||
|
kwargs['seed'] = None
|
||||||
|
return _orig_random_normal(*args, **kwargs)
|
||||||
|
tf.random.normal = random_normal
|
||||||
|
|
||||||
|
|
||||||
|
class AttrDict(dict):
|
||||||
|
|
||||||
|
__setattr__ = dict.__setitem__
|
||||||
|
__getattr__ = dict.__getitem__
|
||||||
|
|
||||||
|
|
||||||
|
class Module(tf.Module):
|
||||||
|
|
||||||
|
def save(self, filename):
|
||||||
|
values = tf.nest.map_structure(lambda x: x.numpy(), self.variables)
|
||||||
|
amount = len(tf.nest.flatten(values))
|
||||||
|
count = int(sum(np.prod(x.shape) for x in tf.nest.flatten(values)))
|
||||||
|
print(f'Save checkpoint with {amount} tensors and {count} parameters.')
|
||||||
|
with pathlib.Path(filename).open('wb') as f:
|
||||||
|
pickle.dump(values, f)
|
||||||
|
|
||||||
|
def load(self, filename):
|
||||||
|
with pathlib.Path(filename).open('rb') as f:
|
||||||
|
values = pickle.load(f)
|
||||||
|
amount = len(tf.nest.flatten(values))
|
||||||
|
count = int(sum(np.prod(x.shape) for x in tf.nest.flatten(values)))
|
||||||
|
print(f'Load checkpoint with {amount} tensors and {count} parameters.')
|
||||||
|
tf.nest.map_structure(lambda x, y: x.assign(y), self.variables, values)
|
||||||
|
|
||||||
|
def get(self, name, ctor, *args, **kwargs):
|
||||||
|
# Create or get layer by name to avoid mentioning it in the constructor.
|
||||||
|
if not hasattr(self, '_modules'):
|
||||||
|
self._modules = {}
|
||||||
|
if name not in self._modules:
|
||||||
|
self._modules[name] = ctor(*args, **kwargs)
|
||||||
|
return self._modules[name]
|
||||||
|
|
||||||
|
|
||||||
|
def var_nest_names(nest):
|
||||||
|
if isinstance(nest, dict):
|
||||||
|
items = ' '.join(f'{k}:{var_nest_names(v)}' for k, v in nest.items())
|
||||||
|
return '{' + items + '}'
|
||||||
|
if isinstance(nest, (list, tuple)):
|
||||||
|
items = ' '.join(var_nest_names(v) for v in nest)
|
||||||
|
return '[' + items + ']'
|
||||||
|
if hasattr(nest, 'name') and hasattr(nest, 'shape'):
|
||||||
|
return nest.name + str(nest.shape).replace(', ', 'x')
|
||||||
|
if hasattr(nest, 'shape'):
|
||||||
|
return str(nest.shape).replace(', ', 'x')
|
||||||
|
return '?'
|
||||||
|
|
||||||
|
|
||||||
|
class Logger:
|
||||||
|
|
||||||
|
def __init__(self, logdir, step):
|
||||||
|
self._logdir = logdir
|
||||||
|
self._writer = tf.summary.create_file_writer(str(logdir), max_queue=1000)
|
||||||
|
self._last_step = None
|
||||||
|
self._last_time = None
|
||||||
|
self._scalars = {}
|
||||||
|
self._images = {}
|
||||||
|
self._videos = {}
|
||||||
|
self.step = step
|
||||||
|
|
||||||
|
def scalar(self, name, value):
|
||||||
|
self._scalars[name] = float(value)
|
||||||
|
|
||||||
|
def image(self, name, value):
|
||||||
|
self._images[name] = np.array(value)
|
||||||
|
|
||||||
|
def video(self, name, value):
|
||||||
|
self._videos[name] = np.array(value)
|
||||||
|
|
||||||
|
def write(self, fps=False):
|
||||||
|
scalars = list(self._scalars.items())
|
||||||
|
if fps:
|
||||||
|
scalars.append(('fps', self._compute_fps(self.step)))
|
||||||
|
print(f'[{self.step}]', ' / '.join(f'{k} {v:.1f}' for k, v in scalars))
|
||||||
|
with (self._logdir / 'metrics.jsonl').open('a') as f:
|
||||||
|
f.write(json.dumps({'step': self.step, ** dict(scalars)}) + '\n')
|
||||||
|
with self._writer.as_default():
|
||||||
|
for name, value in scalars:
|
||||||
|
tf.summary.scalar('scalars/' + name, value, self.step)
|
||||||
|
for name, value in self._images.items():
|
||||||
|
tf.summary.image(name, value, self.step)
|
||||||
|
for name, value in self._videos.items():
|
||||||
|
video_summary(name, value, self.step)
|
||||||
|
self._writer.flush()
|
||||||
|
self._scalars = {}
|
||||||
|
self._images = {}
|
||||||
|
self._videos = {}
|
||||||
|
|
||||||
|
def _compute_fps(self, step):
|
||||||
|
if self._last_step is None:
|
||||||
|
self._last_time = time.time()
|
||||||
|
self._last_step = step
|
||||||
|
return 0
|
||||||
|
steps = step - self._last_step
|
||||||
|
duration = time.time() - self._last_time
|
||||||
|
self._last_time += duration
|
||||||
|
self._last_step = step
|
||||||
|
return steps / duration
|
||||||
|
|
||||||
|
|
||||||
|
def graph_summary(writer, step, fn, *args):
|
||||||
|
def inner(*args):
|
||||||
|
tf.summary.experimental.set_step(step.numpy().item())
|
||||||
|
with writer.as_default():
|
||||||
|
fn(*args)
|
||||||
|
return tf.numpy_function(inner, args, [])
|
||||||
|
|
||||||
|
|
||||||
|
def video_summary(name, video, step=None, fps=20):
|
||||||
|
name = name if isinstance(name, str) else name.decode('utf-8')
|
||||||
|
if np.issubdtype(video.dtype, np.floating):
|
||||||
|
video = np.clip(255 * video, 0, 255).astype(np.uint8)
|
||||||
|
B, T, H, W, C = video.shape
|
||||||
|
try:
|
||||||
|
frames = video.transpose((1, 2, 0, 3, 4)).reshape((T, H, B * W, C))
|
||||||
|
summary = tf1.Summary()
|
||||||
|
image = tf1.Summary.Image(height=B * H, width=T * W, colorspace=C)
|
||||||
|
image.encoded_image_string = encode_gif(frames, fps)
|
||||||
|
summary.value.add(tag=name, image=image)
|
||||||
|
tf.summary.experimental.write_raw_pb(summary.SerializeToString(), step)
|
||||||
|
except (IOError, OSError) as e:
|
||||||
|
print('GIF summaries require ffmpeg in $PATH.', e)
|
||||||
|
frames = video.transpose((0, 2, 1, 3, 4)).reshape((1, B * H, T * W, C))
|
||||||
|
tf.summary.image(name, frames, step)
|
||||||
|
|
||||||
|
|
||||||
|
def encode_gif(frames, fps):
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
|
h, w, c = frames[0].shape
|
||||||
|
pxfmt = {1: 'gray', 3: 'rgb24'}[c]
|
||||||
|
cmd = ' '.join([
|
||||||
|
f'ffmpeg -y -f rawvideo -vcodec rawvideo',
|
||||||
|
f'-r {fps:.02f} -s {w}x{h} -pix_fmt {pxfmt} -i - -filter_complex',
|
||||||
|
f'[0:v]split[x][z];[z]palettegen[y];[x]fifo[x];[x][y]paletteuse',
|
||||||
|
f'-r {fps:.02f} -f gif -'])
|
||||||
|
proc = Popen(cmd.split(' '), stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||||
|
for image in frames:
|
||||||
|
proc.stdin.write(image.tostring())
|
||||||
|
out, err = proc.communicate()
|
||||||
|
if proc.returncode:
|
||||||
|
raise IOError('\n'.join([' '.join(cmd), err.decode('utf8')]))
|
||||||
|
del proc
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def simulate(agent, envs, steps=0, episodes=0, state=None):
|
||||||
|
# Initialize or unpack simulation state.
|
||||||
|
if state is None:
|
||||||
|
step, episode = 0, 0
|
||||||
|
done = np.ones(len(envs), np.bool)
|
||||||
|
length = np.zeros(len(envs), np.int32)
|
||||||
|
obs = [None] * len(envs)
|
||||||
|
agent_state = None
|
||||||
|
else:
|
||||||
|
step, episode, done, length, obs, agent_state = state
|
||||||
|
while (steps and step < steps) or (episodes and episode < episodes):
|
||||||
|
# Reset envs if necessary.
|
||||||
|
if done.any():
|
||||||
|
indices = [index for index, d in enumerate(done) if d]
|
||||||
|
results = [envs[i].reset() for i in indices]
|
||||||
|
for index, result in zip(indices, results):
|
||||||
|
obs[index] = result
|
||||||
|
# Step agents.
|
||||||
|
obs = {k: np.stack([o[k] for o in obs]) for k in obs[0]}
|
||||||
|
action, agent_state = agent(obs, done, agent_state)
|
||||||
|
if isinstance(action, dict):
|
||||||
|
action = [
|
||||||
|
{k: np.array(action[k][i]) for k in action}
|
||||||
|
for i in range(len(envs))]
|
||||||
|
else:
|
||||||
|
action = np.array(action)
|
||||||
|
assert len(action) == len(envs)
|
||||||
|
# Step envs.
|
||||||
|
results = [e.step(a) for e, a in zip(envs, action)]
|
||||||
|
obs, _, done = zip(*[p[:3] for p in results])
|
||||||
|
obs = list(obs)
|
||||||
|
done = np.stack(done)
|
||||||
|
episode += int(done.sum())
|
||||||
|
length += 1
|
||||||
|
step += (done * length).sum()
|
||||||
|
length *= (1 - done)
|
||||||
|
# import pdb
|
||||||
|
# pdb.set_trace()
|
||||||
|
# Return new state to allow resuming the simulation.
|
||||||
|
return (step - steps, episode - episodes, done, length, obs, agent_state)
|
||||||
|
|
||||||
|
|
||||||
|
def save_episodes(directory, episodes):
|
||||||
|
directory = pathlib.Path(directory).expanduser()
|
||||||
|
directory.mkdir(parents=True, exist_ok=True)
|
||||||
|
timestamp = datetime.datetime.now().strftime('%Y%m%dT%H%M%S')
|
||||||
|
filenames = []
|
||||||
|
for episode in episodes:
|
||||||
|
identifier = str(uuid.uuid4().hex)
|
||||||
|
length = len(episode['reward'])
|
||||||
|
filename = directory / f'{timestamp}-{identifier}-{length}.npz'
|
||||||
|
with io.BytesIO() as f1:
|
||||||
|
np.savez_compressed(f1, **episode)
|
||||||
|
f1.seek(0)
|
||||||
|
with filename.open('wb') as f2:
|
||||||
|
f2.write(f1.read())
|
||||||
|
filenames.append(filename)
|
||||||
|
return filenames
|
||||||
|
|
||||||
|
|
||||||
|
def sample_episodes(episodes, length=None, balance=False, seed=0):
|
||||||
|
random = np.random.RandomState(seed)
|
||||||
|
while True:
|
||||||
|
episode = random.choice(list(episodes.values()))
|
||||||
|
if length:
|
||||||
|
total = len(next(iter(episode.values())))
|
||||||
|
available = total - length
|
||||||
|
if available < 1:
|
||||||
|
# print(f'Skipped short episode of length {available}.')
|
||||||
|
continue
|
||||||
|
if balance:
|
||||||
|
index = min(random.randint(0, total), available)
|
||||||
|
else:
|
||||||
|
index = int(random.randint(0, available + 1))
|
||||||
|
episode = {k: v[index: index + length] for k, v in episode.items()}
|
||||||
|
yield episode
|
||||||
|
|
||||||
|
|
||||||
|
def load_episodes(directory, limit=None):
|
||||||
|
directory = pathlib.Path(directory).expanduser()
|
||||||
|
episodes = {}
|
||||||
|
total = 0
|
||||||
|
for filename in reversed(sorted(directory.glob('*.npz'))):
|
||||||
|
try:
|
||||||
|
with filename.open('rb') as f:
|
||||||
|
episode = np.load(f)
|
||||||
|
episode = {k: episode[k] for k in episode.keys()}
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Could not load episode: {e}')
|
||||||
|
continue
|
||||||
|
episodes[str(filename)] = episode
|
||||||
|
total += len(episode['reward']) - 1
|
||||||
|
if limit and total >= limit:
|
||||||
|
break
|
||||||
|
return episodes
|
||||||
|
|
||||||
|
|
||||||
|
class DtypeDist:
|
||||||
|
|
||||||
|
def __init__(self, dist, dtype=None):
|
||||||
|
self._dist = dist
|
||||||
|
self._dtype = dtype or prec.global_policy().compute_dtype
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return 'DtypeDist'
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._dist, name)
|
||||||
|
|
||||||
|
def mean(self):
|
||||||
|
return tf.cast(self._dist.mean(), self._dtype)
|
||||||
|
|
||||||
|
def mode(self):
|
||||||
|
return tf.cast(self._dist.mode(), self._dtype)
|
||||||
|
|
||||||
|
def entropy(self):
|
||||||
|
return tf.cast(self._dist.entropy(), self._dtype)
|
||||||
|
|
||||||
|
def sample(self, *args, **kwargs):
|
||||||
|
return tf.cast(self._dist.sample(*args, **kwargs), self._dtype)
|
||||||
|
|
||||||
|
|
||||||
|
class SampleDist:
|
||||||
|
|
||||||
|
def __init__(self, dist, samples=100):
|
||||||
|
self._dist = dist
|
||||||
|
self._samples = samples
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return 'SampleDist'
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._dist, name)
|
||||||
|
|
||||||
|
def mean(self):
|
||||||
|
samples = self._dist.sample(self._samples)
|
||||||
|
return tf.reduce_mean(samples, 0)
|
||||||
|
|
||||||
|
def mode(self):
|
||||||
|
sample = self._dist.sample(self._samples)
|
||||||
|
logprob = self._dist.log_prob(sample)
|
||||||
|
return tf.gather(sample, tf.argmax(logprob))[0]
|
||||||
|
|
||||||
|
def entropy(self):
|
||||||
|
sample = self._dist.sample(self._samples)
|
||||||
|
logprob = self.log_prob(sample)
|
||||||
|
return -tf.reduce_mean(logprob, 0)
|
||||||
|
|
||||||
|
|
||||||
|
class OneHotDist(tfd.OneHotCategorical):
|
||||||
|
|
||||||
|
def __init__(self, logits=None, probs=None, dtype=None):
|
||||||
|
self._sample_dtype = dtype or prec.global_policy().compute_dtype
|
||||||
|
super().__init__(logits=logits, probs=probs)
|
||||||
|
|
||||||
|
def mode(self):
|
||||||
|
return tf.cast(super().mode(), self._sample_dtype)
|
||||||
|
|
||||||
|
def sample(self, sample_shape=(), seed=None):
|
||||||
|
# Straight through biased gradient estimator.
|
||||||
|
sample = tf.cast(super().sample(sample_shape, seed), self._sample_dtype)
|
||||||
|
probs = super().probs_parameter()
|
||||||
|
while len(probs.shape) < len(sample.shape):
|
||||||
|
probs = probs[None]
|
||||||
|
sample += tf.cast(probs - tf.stop_gradient(probs), self._sample_dtype)
|
||||||
|
return sample
|
||||||
|
|
||||||
|
|
||||||
|
class GumbleDist(tfd.RelaxedOneHotCategorical):
|
||||||
|
|
||||||
|
def __init__(self, temp, logits=None, probs=None, dtype=None):
|
||||||
|
self._sample_dtype = dtype or prec.global_policy().compute_dtype
|
||||||
|
self._exact = tfd.OneHotCategorical(logits=logits, probs=probs)
|
||||||
|
super().__init__(temp, logits=logits, probs=probs)
|
||||||
|
|
||||||
|
def mode(self):
|
||||||
|
return tf.cast(self._exact.mode(), self._sample_dtype)
|
||||||
|
|
||||||
|
def entropy(self):
|
||||||
|
return tf.cast(self._exact.entropy(), self._sample_dtype)
|
||||||
|
|
||||||
|
def sample(self, sample_shape=(), seed=None):
|
||||||
|
return tf.cast(super().sample(sample_shape, seed), self._sample_dtype)
|
||||||
|
|
||||||
|
|
||||||
|
class UnnormalizedHuber(tfd.Normal):
|
||||||
|
|
||||||
|
def __init__(self, loc, scale, threshold=1, **kwargs):
|
||||||
|
self._threshold = tf.cast(threshold, loc.dtype)
|
||||||
|
super().__init__(loc, scale, **kwargs)
|
||||||
|
|
||||||
|
def log_prob(self, event):
|
||||||
|
return -(tf.math.sqrt(
|
||||||
|
(event - self.mean()) ** 2 + self._threshold ** 2) - self._threshold)
|
||||||
|
|
||||||
|
|
||||||
|
class SafeTruncatedNormal(tfd.TruncatedNormal):
|
||||||
|
|
||||||
|
def __init__(self, loc, scale, low, high, clip=1e-6, mult=1):
|
||||||
|
super().__init__(loc, scale, low, high)
|
||||||
|
self._clip = clip
|
||||||
|
self._mult = mult
|
||||||
|
|
||||||
|
def sample(self, *args, **kwargs):
|
||||||
|
event = super().sample(*args, **kwargs)
|
||||||
|
if self._clip:
|
||||||
|
clipped = tf.clip_by_value(
|
||||||
|
event, self.low + self._clip, self.high - self._clip)
|
||||||
|
event = event - tf.stop_gradient(event) + tf.stop_gradient(clipped)
|
||||||
|
if self._mult:
|
||||||
|
event *= self._mult
|
||||||
|
return event
|
||||||
|
|
||||||
|
|
||||||
|
class TanhBijector(tfp.bijectors.Bijector):
|
||||||
|
|
||||||
|
def __init__(self, validate_args=False, name='tanh'):
|
||||||
|
super().__init__(
|
||||||
|
forward_min_event_ndims=0,
|
||||||
|
validate_args=validate_args,
|
||||||
|
name=name)
|
||||||
|
|
||||||
|
def _forward(self, x):
|
||||||
|
return tf.nn.tanh(x)
|
||||||
|
|
||||||
|
def _inverse(self, y):
|
||||||
|
dtype = y.dtype
|
||||||
|
y = tf.cast(y, tf.float32)
|
||||||
|
y = tf.where(
|
||||||
|
tf.less_equal(tf.abs(y), 1.),
|
||||||
|
tf.clip_by_value(y, -0.99999997, 0.99999997), y)
|
||||||
|
y = tf.atanh(y)
|
||||||
|
y = tf.cast(y, dtype)
|
||||||
|
return y
|
||||||
|
|
||||||
|
def _forward_log_det_jacobian(self, x):
|
||||||
|
log2 = tf.math.log(tf.constant(2.0, dtype=x.dtype))
|
||||||
|
return 2.0 * (log2 - x - tf.nn.softplus(-2.0 * x))
|
||||||
|
|
||||||
|
|
||||||
|
def lambda_return(
|
||||||
|
reward, value, pcont, bootstrap, lambda_, axis):
|
||||||
|
# Setting lambda=1 gives a discounted Monte Carlo return.
|
||||||
|
# Setting lambda=0 gives a fixed 1-step return.
|
||||||
|
assert reward.shape.ndims == value.shape.ndims, (reward.shape, value.shape)
|
||||||
|
if isinstance(pcont, (int, float)):
|
||||||
|
pcont = pcont * tf.ones_like(reward)
|
||||||
|
dims = list(range(reward.shape.ndims))
|
||||||
|
dims = [axis] + dims[1:axis] + [0] + dims[axis + 1:]
|
||||||
|
if axis != 0:
|
||||||
|
reward = tf.transpose(reward, dims)
|
||||||
|
value = tf.transpose(value, dims)
|
||||||
|
pcont = tf.transpose(pcont, dims)
|
||||||
|
if bootstrap is None:
|
||||||
|
bootstrap = tf.zeros_like(value[-1])
|
||||||
|
next_values = tf.concat([value[1:], bootstrap[None]], 0)
|
||||||
|
inputs = reward + pcont * next_values * (1 - lambda_)
|
||||||
|
returns = static_scan(
|
||||||
|
lambda agg, cur: cur[0] + cur[1] * lambda_ * agg,
|
||||||
|
(inputs, pcont), bootstrap, reverse=True)
|
||||||
|
if axis != 0:
|
||||||
|
returns = tf.transpose(returns, dims)
|
||||||
|
return returns
|
||||||
|
|
||||||
|
|
||||||
|
class Optimizer(tf.Module):
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, name, lr, eps=1e-4, clip=None, wd=None, wd_pattern=r'.*',
|
||||||
|
opt='adam'):
|
||||||
|
assert 0 <= wd < 1
|
||||||
|
assert not clip or 1 <= clip
|
||||||
|
self._name = name
|
||||||
|
self._clip = clip
|
||||||
|
self._wd = wd
|
||||||
|
self._wd_pattern = wd_pattern
|
||||||
|
self._opt = {
|
||||||
|
'adam': lambda: tf.optimizers.Adam(lr, epsilon=eps),
|
||||||
|
'nadam': lambda: tf.optimizers.Nadam(lr, epsilon=eps),
|
||||||
|
'adamax': lambda: tf.optimizers.Adamax(lr, epsilon=eps),
|
||||||
|
'sgd': lambda: tf.optimizers.SGD(lr),
|
||||||
|
'momentum': lambda: tf.optimizers.SGD(lr, 0.9),
|
||||||
|
}[opt]()
|
||||||
|
self._mixed = (prec.global_policy().compute_dtype == tf.float16)
|
||||||
|
if self._mixed:
|
||||||
|
self._opt = prec.LossScaleOptimizer(self._opt, 'dynamic')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def variables(self):
|
||||||
|
return self._opt.variables()
|
||||||
|
|
||||||
|
def __call__(self, tape, loss, modules, prefix=None):
|
||||||
|
assert loss.dtype is tf.float32, self._name
|
||||||
|
modules = modules if hasattr(modules, '__len__') else (modules,)
|
||||||
|
varibs = tf.nest.flatten([module.variables for module in modules])
|
||||||
|
count = sum(np.prod(x.shape) for x in varibs)
|
||||||
|
print(f'Found {count} {self._name} parameters.')
|
||||||
|
assert len(loss.shape) == 0, loss.shape
|
||||||
|
tf.debugging.check_numerics(loss, self._name + '_loss')
|
||||||
|
if self._mixed:
|
||||||
|
with tape:
|
||||||
|
loss = self._opt.get_scaled_loss(loss)
|
||||||
|
grads = tape.gradient(loss, varibs)
|
||||||
|
if self._mixed:
|
||||||
|
grads = self._opt.get_unscaled_gradients(grads)
|
||||||
|
norm = tf.linalg.global_norm(grads)
|
||||||
|
if not self._mixed:
|
||||||
|
tf.debugging.check_numerics(norm, self._name + '_norm')
|
||||||
|
if self._clip:
|
||||||
|
grads, _ = tf.clip_by_global_norm(grads, self._clip, norm)
|
||||||
|
if self._wd:
|
||||||
|
self._apply_weight_decay(varibs)
|
||||||
|
self._opt.apply_gradients(zip(grads, varibs))
|
||||||
|
metrics = {}
|
||||||
|
if prefix:
|
||||||
|
metrics[f'{prefix}/{self._name}_loss'] = loss
|
||||||
|
metrics[f'{prefix}/{self._name}_grad_norm'] = norm
|
||||||
|
if self._mixed:
|
||||||
|
metrics[f'{prefix}/{self._name}_loss_scale'] = \
|
||||||
|
self._opt.loss_scale._current_loss_scale
|
||||||
|
else:
|
||||||
|
metrics[f'{self._name}_loss'] = loss
|
||||||
|
metrics[f'{self._name}_grad_norm'] = norm
|
||||||
|
if self._mixed:
|
||||||
|
metrics[f'{self._name}_loss_scale'] = \
|
||||||
|
self._opt.loss_scale._current_loss_scale
|
||||||
|
return metrics
|
||||||
|
|
||||||
|
def _apply_weight_decay(self, varibs):
|
||||||
|
nontrivial = (self._wd_pattern != r'.*')
|
||||||
|
if nontrivial:
|
||||||
|
print('Applied weight decay to variables:')
|
||||||
|
for var in varibs:
|
||||||
|
if re.search(self._wd_pattern, self._name + '/' + var.name):
|
||||||
|
if nontrivial:
|
||||||
|
print('- ' + self._name + '/' + var.name)
|
||||||
|
var.assign((1 - self._wd) * var)
|
||||||
|
|
||||||
|
|
||||||
|
def args_type(default):
|
||||||
|
def parse_string(x):
|
||||||
|
if default is None:
|
||||||
|
return x
|
||||||
|
if isinstance(default, bool):
|
||||||
|
return bool(['False', 'True'].index(x))
|
||||||
|
if isinstance(default, int):
|
||||||
|
return float(x) if ('e' in x or '.' in x) else int(x)
|
||||||
|
if isinstance(default, (list, tuple)):
|
||||||
|
return tuple(args_type(default[0])(y) for y in x.split(','))
|
||||||
|
return type(default)(x)
|
||||||
|
def parse_object(x):
|
||||||
|
if isinstance(default, (list, tuple)):
|
||||||
|
return tuple(x)
|
||||||
|
return x
|
||||||
|
return lambda x: parse_string(x) if isinstance(x, str) else parse_object(x)
|
||||||
|
|
||||||
|
|
||||||
|
def static_scan(fn, inputs, start, reverse=False):
|
||||||
|
last = start
|
||||||
|
outputs = [[] for _ in tf.nest.flatten(start)]
|
||||||
|
indices = range(len(tf.nest.flatten(inputs)[0]))
|
||||||
|
if reverse:
|
||||||
|
indices = reversed(indices)
|
||||||
|
for index in indices:
|
||||||
|
inp = tf.nest.map_structure(lambda x: x[index], inputs)
|
||||||
|
last = fn(last, inp)
|
||||||
|
[o.append(l) for o, l in zip(outputs, tf.nest.flatten(last))]
|
||||||
|
if reverse:
|
||||||
|
outputs = [list(reversed(x)) for x in outputs]
|
||||||
|
outputs = [tf.stack(x, 0) for x in outputs]
|
||||||
|
return tf.nest.pack_sequence_as(start, outputs)
|
||||||
|
|
||||||
|
|
||||||
|
def uniform_mixture(dist, dtype=None):
|
||||||
|
if dist.batch_shape[-1] == 1:
|
||||||
|
return tfd.BatchReshape(dist, dist.batch_shape[:-1])
|
||||||
|
dtype = dtype or prec.global_policy().compute_dtype
|
||||||
|
weights = tfd.Categorical(tf.zeros(dist.batch_shape, dtype))
|
||||||
|
return tfd.MixtureSameFamily(weights, dist)
|
||||||
|
|
||||||
|
|
||||||
|
def cat_mixture_entropy(dist):
|
||||||
|
if isinstance(dist, tfd.MixtureSameFamily):
|
||||||
|
probs = dist.components_distribution.probs_parameter()
|
||||||
|
else:
|
||||||
|
probs = dist.probs_parameter()
|
||||||
|
return -tf.reduce_mean(
|
||||||
|
tf.reduce_mean(probs, 2) *
|
||||||
|
tf.math.log(tf.reduce_mean(probs, 2) + 1e-8), -1)
|
||||||
|
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def cem_planner(
|
||||||
|
state, num_actions, horizon, proposals, topk, iterations, imagine,
|
||||||
|
objective):
|
||||||
|
dtype = prec.global_policy().compute_dtype
|
||||||
|
B, P = list(state.values())[0].shape[0], proposals
|
||||||
|
H, A = horizon, num_actions
|
||||||
|
flat_state = {k: tf.repeat(v, P, 0) for k, v in state.items()}
|
||||||
|
mean = tf.zeros((B, H, A), dtype)
|
||||||
|
std = tf.ones((B, H, A), dtype)
|
||||||
|
for _ in range(iterations):
|
||||||
|
proposals = tf.random.normal((B, P, H, A), dtype=dtype)
|
||||||
|
proposals = proposals * std[:, None] + mean[:, None]
|
||||||
|
proposals = tf.clip_by_value(proposals, -1, 1)
|
||||||
|
flat_proposals = tf.reshape(proposals, (B * P, H, A))
|
||||||
|
states = imagine(flat_proposals, flat_state)
|
||||||
|
scores = objective(states)
|
||||||
|
scores = tf.reshape(tf.reduce_sum(scores, -1), (B, P))
|
||||||
|
_, indices = tf.math.top_k(scores, topk, sorted=False)
|
||||||
|
best = tf.gather(proposals, indices, axis=1, batch_dims=1)
|
||||||
|
mean, var = tf.nn.moments(best, 1)
|
||||||
|
std = tf.sqrt(var + 1e-6)
|
||||||
|
return mean[:, 0, :]
|
||||||
|
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def grad_planner(
|
||||||
|
state, num_actions, horizon, proposals, iterations, imagine, objective,
|
||||||
|
kl_scale, step_size):
|
||||||
|
dtype = prec.global_policy().compute_dtype
|
||||||
|
B, P = list(state.values())[0].shape[0], proposals
|
||||||
|
H, A = horizon, num_actions
|
||||||
|
flat_state = {k: tf.repeat(v, P, 0) for k, v in state.items()}
|
||||||
|
mean = tf.zeros((B, H, A), dtype)
|
||||||
|
rawstd = 0.54 * tf.ones((B, H, A), dtype)
|
||||||
|
for _ in range(iterations):
|
||||||
|
proposals = tf.random.normal((B, P, H, A), dtype=dtype)
|
||||||
|
with tf.GradientTape(watch_accessed_variables=False) as tape:
|
||||||
|
tape.watch(mean)
|
||||||
|
tape.watch(rawstd)
|
||||||
|
std = tf.nn.softplus(rawstd)
|
||||||
|
proposals = proposals * std[:, None] + mean[:, None]
|
||||||
|
proposals = (
|
||||||
|
tf.stop_gradient(tf.clip_by_value(proposals, -1, 1)) +
|
||||||
|
proposals - tf.stop_gradient(proposals))
|
||||||
|
flat_proposals = tf.reshape(proposals, (B * P, H, A))
|
||||||
|
states = imagine(flat_proposals, flat_state)
|
||||||
|
scores = objective(states)
|
||||||
|
scores = tf.reshape(tf.reduce_sum(scores, -1), (B, P))
|
||||||
|
div = tfd.kl_divergence(
|
||||||
|
tfd.Normal(mean, std),
|
||||||
|
tfd.Normal(tf.zeros_like(mean), tf.ones_like(std)))
|
||||||
|
elbo = tf.reduce_sum(scores) - kl_scale * div
|
||||||
|
elbo /= tf.cast(tf.reduce_prod(tf.shape(scores)), dtype)
|
||||||
|
grad_mean, grad_rawstd = tape.gradient(elbo, [mean, rawstd])
|
||||||
|
e, v = tf.nn.moments(grad_mean, [1, 2], keepdims=True)
|
||||||
|
grad_mean /= tf.sqrt(e * e + v + 1e-4)
|
||||||
|
e, v = tf.nn.moments(grad_rawstd, [1, 2], keepdims=True)
|
||||||
|
grad_rawstd /= tf.sqrt(e * e + v + 1e-4)
|
||||||
|
mean = tf.clip_by_value(mean + step_size * grad_mean, -1, 1)
|
||||||
|
rawstd = rawstd + step_size * grad_rawstd
|
||||||
|
return mean[:, 0, :]
|
||||||
|
|
||||||
|
|
||||||
|
class Every:
|
||||||
|
|
||||||
|
def __init__(self, every):
|
||||||
|
self._every = every
|
||||||
|
self._last = None
|
||||||
|
|
||||||
|
def __call__(self, step):
|
||||||
|
if not self._every:
|
||||||
|
return False
|
||||||
|
if self._last is None:
|
||||||
|
self._last = step
|
||||||
|
return True
|
||||||
|
if step >= self._last + self._every:
|
||||||
|
self._last += self._every
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class Once:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._once = True
|
||||||
|
|
||||||
|
def __call__(self):
|
||||||
|
if self._once:
|
||||||
|
self._once = False
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class Until:
|
||||||
|
|
||||||
|
def __init__(self, until):
|
||||||
|
self._until = until
|
||||||
|
|
||||||
|
def __call__(self, step):
|
||||||
|
if not self._until:
|
||||||
|
return True
|
||||||
|
return step < self._until
|
||||||
|
|
||||||
|
|
||||||
|
def schedule(string, step):
|
||||||
|
try:
|
||||||
|
return float(string)
|
||||||
|
except ValueError:
|
||||||
|
step = tf.cast(step, tf.float32)
|
||||||
|
match = re.match(r'linear\((.+),(.+),(.+)\)', string)
|
||||||
|
if match:
|
||||||
|
initial, final, duration = [float(group) for group in match.groups()]
|
||||||
|
mix = tf.clip_by_value(step / duration, 0, 1)
|
||||||
|
return (1 - mix) * initial + mix * final
|
||||||
|
match = re.match(r'warmup\((.+),(.+)\)', string)
|
||||||
|
if match:
|
||||||
|
warmup, value = [float(group) for group in match.groups()]
|
||||||
|
scale = tf.clip_by_value(step / warmup, 0, 1)
|
||||||
|
return scale * value
|
||||||
|
match = re.match(r'exp\((.+),(.+),(.+)\)', string)
|
||||||
|
if match:
|
||||||
|
initial, final, halflife = [float(group) for group in match.groups()]
|
||||||
|
return (initial - final) * 0.5 ** (step / halflife) + final
|
||||||
|
raise NotImplementedError(string)
|
280
DreamerV2/wrappers.py
Normal file
280
DreamerV2/wrappers.py
Normal file
@ -0,0 +1,280 @@
|
|||||||
|
import threading
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class DeepMindControl:
|
||||||
|
|
||||||
|
def __init__(self, name, action_repeat=1, size=(64, 64), camera=None):
|
||||||
|
domain, task = name.split('_', 1)
|
||||||
|
if domain == 'cup': # Only domain with multiple words.
|
||||||
|
domain = 'ball_in_cup'
|
||||||
|
if isinstance(domain, str):
|
||||||
|
from dm_control import suite
|
||||||
|
self._env = suite.load(domain, task)
|
||||||
|
else:
|
||||||
|
assert task is None
|
||||||
|
self._env = domain()
|
||||||
|
self._action_repeat = action_repeat
|
||||||
|
self._size = size
|
||||||
|
if camera is None:
|
||||||
|
camera = dict(quadruped=2).get(domain, 0)
|
||||||
|
self._camera = camera
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
spaces = {}
|
||||||
|
for key, value in self._env.observation_spec().items():
|
||||||
|
spaces[key] = gym.spaces.Box(
|
||||||
|
-np.inf, np.inf, value.shape, dtype=np.float32)
|
||||||
|
spaces['image'] = gym.spaces.Box(
|
||||||
|
0, 255, self._size + (3,), dtype=np.uint8)
|
||||||
|
return gym.spaces.Dict(spaces)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
spec = self._env.action_spec()
|
||||||
|
return gym.spaces.Box(spec.minimum, spec.maximum, dtype=np.float32)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
assert np.isfinite(action).all(), action
|
||||||
|
reward = 0
|
||||||
|
for _ in range(self._action_repeat):
|
||||||
|
time_step = self._env.step(action)
|
||||||
|
reward += time_step.reward or 0
|
||||||
|
if time_step.last():
|
||||||
|
break
|
||||||
|
obs = dict(time_step.observation)
|
||||||
|
obs['image'] = self.render()
|
||||||
|
done = time_step.last()
|
||||||
|
info = {'discount': np.array(time_step.discount, np.float32)}
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
time_step = self._env.reset()
|
||||||
|
obs = dict(time_step.observation)
|
||||||
|
obs['image'] = self.render()
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def render(self, *args, **kwargs):
|
||||||
|
if kwargs.get('mode', 'rgb_array') != 'rgb_array':
|
||||||
|
raise ValueError("Only render mode 'rgb_array' is supported.")
|
||||||
|
return self._env.physics.render(*self._size, camera_id=self._camera)
|
||||||
|
|
||||||
|
|
||||||
|
class Atari:
|
||||||
|
|
||||||
|
LOCK = threading.Lock()
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, name, action_repeat=4, size=(84, 84), grayscale=True, noops=30,
|
||||||
|
life_done=False, sticky_actions=True, all_actions=False):
|
||||||
|
assert size[0] == size[1]
|
||||||
|
import gym.wrappers
|
||||||
|
import gym.envs.atari
|
||||||
|
with self.LOCK:
|
||||||
|
env = gym.envs.atari.AtariEnv(
|
||||||
|
game=name, obs_type='image', frameskip=1,
|
||||||
|
repeat_action_probability=0.25 if sticky_actions else 0.0,
|
||||||
|
full_action_space=all_actions)
|
||||||
|
# Avoid unnecessary rendering in inner env.
|
||||||
|
env._get_obs = lambda: None
|
||||||
|
# Tell wrapper that the inner env has no action repeat.
|
||||||
|
env.spec = gym.envs.registration.EnvSpec('NoFrameskip-v0')
|
||||||
|
env = gym.wrappers.AtariPreprocessing(
|
||||||
|
env, noops, action_repeat, size[0], life_done, grayscale)
|
||||||
|
self._env = env
|
||||||
|
self._grayscale = grayscale
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
return gym.spaces.Dict({
|
||||||
|
'image': self._env.observation_space,
|
||||||
|
'ram': gym.spaces.Box(0, 255, (128,), np.uint8),
|
||||||
|
})
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
return self._env.action_space
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
return self._env.close()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
with self.LOCK:
|
||||||
|
image = self._env.reset()
|
||||||
|
if self._grayscale:
|
||||||
|
image = image[..., None]
|
||||||
|
obs = {'image': image, 'ram': self._env.env._get_ram()}
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
image, reward, done, info = self._env.step(action)
|
||||||
|
if self._grayscale:
|
||||||
|
image = image[..., None]
|
||||||
|
obs = {'image': image, 'ram': self._env.env._get_ram()}
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def render(self, mode):
|
||||||
|
return self._env.render(mode)
|
||||||
|
|
||||||
|
class CollectDataset:
|
||||||
|
|
||||||
|
def __init__(self, env, callbacks=None, precision=32):
|
||||||
|
self._env = env
|
||||||
|
self._callbacks = callbacks or ()
|
||||||
|
self._precision = precision
|
||||||
|
self._episode = None
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
obs, reward, done, info = self._env.step(action)
|
||||||
|
obs = {k: self._convert(v) for k, v in obs.items()}
|
||||||
|
transition = obs.copy()
|
||||||
|
transition['action'] = action
|
||||||
|
transition['reward'] = reward
|
||||||
|
transition['discount'] = info.get('discount', np.array(1 - float(done)))
|
||||||
|
self._episode.append(transition)
|
||||||
|
if done:
|
||||||
|
episode = {k: [t[k] for t in self._episode] for k in self._episode[0]}
|
||||||
|
episode = {k: self._convert(v) for k, v in episode.items()}
|
||||||
|
info['episode'] = episode
|
||||||
|
for callback in self._callbacks:
|
||||||
|
callback(episode)
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
obs = self._env.reset()
|
||||||
|
transition = obs.copy()
|
||||||
|
transition['action'] = np.zeros(self._env.action_space.shape)
|
||||||
|
transition['reward'] = 0.0
|
||||||
|
transition['discount'] = 1.0
|
||||||
|
self._episode = [transition]
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def _convert(self, value):
|
||||||
|
value = np.array(value)
|
||||||
|
if np.issubdtype(value.dtype, np.floating):
|
||||||
|
dtype = {16: np.float16, 32: np.float32, 64: np.float64}[self._precision]
|
||||||
|
elif np.issubdtype(value.dtype, np.signedinteger):
|
||||||
|
dtype = {16: np.int16, 32: np.int32, 64: np.int64}[self._precision]
|
||||||
|
elif np.issubdtype(value.dtype, np.uint8):
|
||||||
|
dtype = np.uint8
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(value.dtype)
|
||||||
|
return value.astype(dtype)
|
||||||
|
|
||||||
|
|
||||||
|
class TimeLimit:
|
||||||
|
|
||||||
|
def __init__(self, env, duration):
|
||||||
|
self._env = env
|
||||||
|
self._duration = duration
|
||||||
|
self._step = None
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
assert self._step is not None, 'Must reset environment.'
|
||||||
|
obs, reward, done, info = self._env.step(action)
|
||||||
|
self._step += 1
|
||||||
|
if self._step >= self._duration:
|
||||||
|
done = True
|
||||||
|
if 'discount' not in info:
|
||||||
|
info['discount'] = np.array(1.0).astype(np.float32)
|
||||||
|
self._step = None
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self._step = 0
|
||||||
|
return self._env.reset()
|
||||||
|
|
||||||
|
|
||||||
|
class NormalizeActions:
|
||||||
|
|
||||||
|
def __init__(self, env):
|
||||||
|
self._env = env
|
||||||
|
self._mask = np.logical_and(
|
||||||
|
np.isfinite(env.action_space.low),
|
||||||
|
np.isfinite(env.action_space.high))
|
||||||
|
self._low = np.where(self._mask, env.action_space.low, -1)
|
||||||
|
self._high = np.where(self._mask, env.action_space.high, 1)
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
low = np.where(self._mask, -np.ones_like(self._low), self._low)
|
||||||
|
high = np.where(self._mask, np.ones_like(self._low), self._high)
|
||||||
|
return gym.spaces.Box(low, high, dtype=np.float32)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
original = (action + 1) / 2 * (self._high - self._low) + self._low
|
||||||
|
original = np.where(self._mask, original, action)
|
||||||
|
return self._env.step(original)
|
||||||
|
|
||||||
|
|
||||||
|
class OneHotAction:
|
||||||
|
|
||||||
|
def __init__(self, env):
|
||||||
|
assert isinstance(env.action_space, gym.spaces.Discrete)
|
||||||
|
self._env = env
|
||||||
|
self._random = np.random.RandomState()
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
shape = (self._env.action_space.n,)
|
||||||
|
space = gym.spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
|
||||||
|
space.sample = self._sample_action
|
||||||
|
return space
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
index = np.argmax(action).astype(int)
|
||||||
|
reference = np.zeros_like(action)
|
||||||
|
reference[index] = 1
|
||||||
|
if not np.allclose(reference, action):
|
||||||
|
raise ValueError(f'Invalid one-hot action:\n{action}')
|
||||||
|
return self._env.step(index)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
return self._env.reset()
|
||||||
|
|
||||||
|
def _sample_action(self):
|
||||||
|
actions = self._env.action_space.n
|
||||||
|
index = self._random.randint(0, actions)
|
||||||
|
reference = np.zeros(actions, dtype=np.float32)
|
||||||
|
reference[index] = 1.0
|
||||||
|
return reference
|
||||||
|
|
||||||
|
|
||||||
|
class RewardObs:
|
||||||
|
|
||||||
|
def __init__(self, env):
|
||||||
|
self._env = env
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
spaces = self._env.observation_space.spaces
|
||||||
|
assert 'reward' not in spaces
|
||||||
|
spaces['reward'] = gym.spaces.Box(-np.inf, np.inf, dtype=np.float32)
|
||||||
|
return gym.spaces.Dict(spaces)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
obs, reward, done, info = self._env.step(action)
|
||||||
|
obs['reward'] = reward
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
obs = self._env.reset()
|
||||||
|
obs['reward'] = 0.0
|
||||||
|
return obs
|
126
README.md
Normal file
126
README.md
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
# Learning Task Informed Abstractions (TIA)
|
||||||
|
|
||||||
|
<sub>Left to right: Raw Observation, Dreamer, Joint of TIA, Task Stream of TIA, Distractor Stream of TIA</sub>
|
||||||
|
|
||||||
|
![](imgs/gt.gif) ![](imgs/pred.gif) ![](imgs/joint.gif) ![](imgs/main.gif) ![](imgs/disen.gif)
|
||||||
|
|
||||||
|
|
||||||
|
This code base contains a minimal modification over [Dreamer](https://danijar.com/project/dreamer/)/[DreamerV2](https://danijar.com/project/dreamerv2/) to learn disentangled world models, presented in:
|
||||||
|
|
||||||
|
**Learning Task Informed Abstractions**
|
||||||
|
|
||||||
|
Xiang Fu*, Ge Yang*, Pulkit Agrawal, Tommi Jaakkola
|
||||||
|
|
||||||
|
ICML 2021 [[website]](https://xiangfu.co/tia) [[paper]](https://arxiv.org/abs/2106.15612)
|
||||||
|
|
||||||
|
|
||||||
|
The directory [Dreamer](./Dreamer) contains code for running DMC experiments. The directory [DreamerV2](./DreamerV2) contains code for running Atari experiments. This implementation is tested with Python 3.6, Tensorflow 2.3.1 and CUDA 10.1. The training/evaluation metrics used for producing the figures in the paper can be downloaded from [this Google Drive link](https://drive.google.com/file/d/1wvSp9Q7r2Ah5xRE_x3nJy-uwLkjF2RgX/view?usp=sharing).
|
||||||
|
|
||||||
|
If you find this code useful, please consider citing:
|
||||||
|
|
||||||
|
```
|
||||||
|
@inproceedings{fu2021learning,
|
||||||
|
title={Learning Task Informed Abstractions},
|
||||||
|
author={Fu, Xiang and Yang, Ge and Agrawal, Pulkit and Jaakkola, Tommi},
|
||||||
|
booktitle={ICML},
|
||||||
|
year={2021}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Getting started
|
||||||
|
|
||||||
|
Get dependencies:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
pip3 install --user tensorflow-gpu==2.3.1
|
||||||
|
pip3 install --user tensorflow_probability==0.11.0
|
||||||
|
pip3 install --user gym
|
||||||
|
pip3 install --user pandas
|
||||||
|
pip3 install --user matplotlib
|
||||||
|
pip3 install --user ruamel.yaml
|
||||||
|
pip3 install --user scikit-image
|
||||||
|
pip3 install --user git+git://github.com/deepmind/dm_control.git
|
||||||
|
pip3 install --user 'gym[atari]'
|
||||||
|
```
|
||||||
|
You will need an active Mujoco license for running DMC experiments.
|
||||||
|
|
||||||
|
## Running DMC experiments with distracting background
|
||||||
|
|
||||||
|
Code for running DMC experiments is under the directory [Dreamer](./Dreamer).
|
||||||
|
|
||||||
|
To run DMC experiments with distracting video backgrounds, you can download a small set of 16 videos (videos with names starting with ''A'' in the Kinetics 400 dataset's `driving_car` class) from [this Google Drive link](https://drive.google.com/file/d/1f-ER2XnhpvQeGjlJaoGRiLR0oEjn6Le_/view?usp=sharing), which is used for producing Figure 9(a) in the paper's appendix.
|
||||||
|
|
||||||
|
To replicate the setup of [DBC](https://github.com/facebookresearch/deep_bisim4control) and use more background videos, first download the Kinetics 400 dataset and grab the `driving_car` label from the train dataset. Use the repo:
|
||||||
|
|
||||||
|
[https://github.com/Showmax/kinetics-downloader](https://github.com/Showmax/kinetics-downloader)
|
||||||
|
|
||||||
|
to download the dataset.
|
||||||
|
|
||||||
|
Train the agent:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
python run.py --method dreamer --configs dmc --task dmc_cheetah_run_driving --logdir ~/logdir --video_dir VIDPATH
|
||||||
|
```
|
||||||
|
|
||||||
|
`VIDPATH` should contains `*.mp4` video files. (if you used the above repo to download the Kinetics videos, you should set `VIDPATH` to `PATH_TO_REPO/kinetics-downloader/dataset/train/driving_car`)
|
||||||
|
|
||||||
|
|
||||||
|
Choose method from:
|
||||||
|
|
||||||
|
```
|
||||||
|
[dreamer, tia, inverse]
|
||||||
|
```
|
||||||
|
|
||||||
|
corresponding to the original Dreamer, TIA, and representation learned with an inverse model as described in Section 4.2 of the paper.
|
||||||
|
|
||||||
|
|
||||||
|
Choose environment + distraction (e.g. `dmc_cheetah_run_driving`):
|
||||||
|
|
||||||
|
```
|
||||||
|
dmc_{domain}_{task}_{distraction}
|
||||||
|
```
|
||||||
|
|
||||||
|
where {domain} (e.g., cheetah, walker, hopper, etc.) and {task} (e.g., run, walk, stand, etc.) are from the DeepMind Control Suite, and distraction can be chosen from:
|
||||||
|
|
||||||
|
```
|
||||||
|
[none, noise, driving]
|
||||||
|
```
|
||||||
|
|
||||||
|
where each option uses different backgrounds:
|
||||||
|
```
|
||||||
|
none: default (no) background
|
||||||
|
|
||||||
|
noise: white noise background
|
||||||
|
|
||||||
|
driving: natural videos from the ''driving car'' class as background
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running Atari experiments
|
||||||
|
|
||||||
|
Code for running Atari experiments is under the directory [DreamerV2](./DreamerV2).
|
||||||
|
|
||||||
|
Train the agent with the game Demon Attack:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
python dreamer.py --logdir ~/logdir/atari_demon_attack/TIA/1 \
|
||||||
|
--configs defaults atari --task atari_demon_attack
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring results
|
||||||
|
|
||||||
|
Both DMC and Atari experiments log with tensorboard by default. The decomposition of the two streams of TIA is visualized in `.gif` animation. Access tensorboard with the command:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
tensorboard --logdir LOGDIR
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Reference
|
||||||
|
|
||||||
|
We modify [Dreamer](https://github.com/danijar/dreamer) for DMC environments and [DreamerV2](https://github.com/danijar/dreamerv2) for Atari games. Thanks Danijar for releasing his very clean implementation! Utilities such as
|
||||||
|
|
||||||
|
- Logging with Tensorboard/JSON line files
|
||||||
|
- debugging with the `debug` flag
|
||||||
|
- mixed precision training
|
||||||
|
|
||||||
|
are the same as in the respective original implementations.
|
BIN
imgs/disen.gif
Normal file
BIN
imgs/disen.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 872 KiB |
BIN
imgs/gt.gif
Normal file
BIN
imgs/gt.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 931 KiB |
BIN
imgs/joint.gif
Normal file
BIN
imgs/joint.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 925 KiB |
BIN
imgs/main.gif
Normal file
BIN
imgs/main.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 899 KiB |
BIN
imgs/pred.gif
Normal file
BIN
imgs/pred.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 970 KiB |
Loading…
Reference in New Issue
Block a user