import multiprocessing import os import platform from functools import partial from collections import deque import gym import numpy as np import tensorflow as tf from baselines.common.tf_util import normc_initializer from mpi4py import MPI import tensorflow_probability as tfp import os import numpy as np tfd = tfp.distributions layers = tf.keras.layers def bcast_tf_vars_from_root(sess, vars): """ Send the root node's parameters to every worker. Arguments: sess: the TensorFlow session. vars: all parameter variables including optimizer's """ rank = MPI.COMM_WORLD.Get_rank() for var in vars: if rank == 0: MPI.COMM_WORLD.bcast(sess.run(var)) else: sess.run(tf.assign(var, MPI.COMM_WORLD.bcast(None))) def get_mean_and_std(array): comm = MPI.COMM_WORLD task_id, num_tasks = comm.Get_rank(), comm.Get_size() local_mean = np.array(np.mean(array)) sum_of_means = np.zeros((), dtype=np.float32) comm.Allreduce(local_mean, sum_of_means, op=MPI.SUM) mean = sum_of_means / num_tasks n_array = array - mean sqs = n_array ** 2 local_mean = np.array(np.mean(sqs)) sum_of_means = np.zeros((), dtype=np.float32) comm.Allreduce(local_mean, sum_of_means, op=MPI.SUM) var = sum_of_means / num_tasks std = var ** 0.5 return mean, std def guess_available_gpus(n_gpus=None): if n_gpus is not None: return list(range(n_gpus)) if 'CUDA_VISIBLE_DEVICES' in os.environ: cuda_visible_divices = os.environ['CUDA_VISIBLE_DEVICES'] cuda_visible_divices = cuda_visible_divices.split(',') return [int(n) for n in cuda_visible_divices] nvidia_dir = '/proc/driver/nvidia/gpus/' if os.path.exists(nvidia_dir): n_gpus = len(os.listdir(nvidia_dir)) return list(range(n_gpus)) raise Exception("Couldn't guess the available gpus on this machine") def setup_mpi_gpus(): """ Set CUDA_VISIBLE_DEVICES using MPI. """ available_gpus = guess_available_gpus() node_id = platform.node() nodes_ordered_by_rank = MPI.COMM_WORLD.allgather(node_id) processes_outranked_on_this_node = [n for n in nodes_ordered_by_rank[:MPI.COMM_WORLD.Get_rank()] if n == node_id] local_rank = len(processes_outranked_on_this_node) os.environ['CUDA_VISIBLE_DEVICES'] = str(available_gpus[local_rank]) def guess_available_cpus(): return int(multiprocessing.cpu_count()) def setup_tensorflow_session(): num_cpu = guess_available_cpus() tf_config = tf.ConfigProto( inter_op_parallelism_threads=num_cpu, intra_op_parallelism_threads=num_cpu ) tf_config.gpu_options.allow_growth = True return tf.Session(config=tf_config) def random_agent_ob_mean_std(env, nsteps=10000): ob = np.asarray(env.reset()) if MPI.COMM_WORLD.Get_rank() == 0: obs = [ob] for _ in range(nsteps): ac = env.action_space.sample() ob, _, done, _ = env.step(ac) if done: ob = env.reset() obs.append(np.asarray(ob)) mean = np.mean(obs, 0).astype(np.float32) std = np.std(obs, 0).mean().astype(np.float32) else: mean = np.empty(shape=ob.shape, dtype=np.float32) std = np.empty(shape=(), dtype=np.float32) MPI.COMM_WORLD.Bcast(mean, root=0) MPI.COMM_WORLD.Bcast(std, root=0) return mean, std def layernorm(x): m, v = tf.nn.moments(x, -1, keep_dims=True) return (x - m) / (tf.sqrt(v) + 1e-8) getsess = tf.get_default_session fc = partial(tf.layers.dense, kernel_initializer=normc_initializer(1.)) activ = tf.nn.relu def flatten_two_dims(x): return tf.reshape(x, [-1] + x.get_shape().as_list()[2:]) def unflatten_first_dim(x, sh): return tf.reshape(x, [sh[0], sh[1]] + x.get_shape().as_list()[1:]) def add_pos_bias(x): with tf.variable_scope(name_or_scope=None, default_name="pos_bias"): b = tf.get_variable(name="pos_bias", shape=[1] + x.get_shape().as_list()[1:], dtype=tf.float32, initializer=tf.zeros_initializer()) return x + b def small_convnet(x, nl, feat_dim, last_nl, layernormalize, batchnorm=False): # nl=512, feat_dim=None, last_nl=0, layernormalize=0, batchnorm=False bn = tf.layers.batch_normalization if batchnorm else lambda x: x x = bn(tf.layers.conv2d(x, filters=32, kernel_size=8, strides=(4, 4), activation=nl)) x = bn(tf.layers.conv2d(x, filters=64, kernel_size=4, strides=(2, 2), activation=nl)) x = bn(tf.layers.conv2d(x, filters=64, kernel_size=3, strides=(1, 1), activation=nl)) x = tf.reshape(x, (-1, np.prod(x.get_shape().as_list()[1:]))) x = bn(fc(x, units=feat_dim, activation=None)) if last_nl is not None: x = last_nl(x) if layernormalize: x = layernorm(x) return x # new add class SmallConv(tf.keras.Model): def __init__(self, feat_dim, name=None): super(SmallConv, self).__init__(name=name) self.conv1 = layers.Conv2D(filters=32, kernel_size=8, strides=(4, 4), activation=tf.nn.leaky_relu) self.conv2 = layers.Conv2D(filters=64, kernel_size=4, strides=(2, 2), activation=tf.nn.leaky_relu) self.conv3 = layers.Conv2D(filters=64, kernel_size=3, strides=(1, 1), activation=tf.nn.leaky_relu) self.fc = layers.Dense(units=feat_dim, activation=None) def call(self, x): x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = tf.reshape(x, (-1, np.prod(x.get_shape().as_list()[1:]))) x = self.fc(x) return x # new add class ResBlock(tf.keras.Model): def __init__(self, hidsize): super(ResBlock, self).__init__() self.hidsize = hidsize self.dense1 = layers.Dense(hidsize, activation=tf.nn.leaky_relu) self.dense2 = layers.Dense(hidsize, activation=None) def call(self, xs): x, a = xs res = self.dense1(tf.concat([x, a], axis=-1)) res = self.dense2(tf.concat([res, a], axis=-1)) assert x.get_shape().as_list()[-1] == self.hidsize and res.get_shape().as_list()[-1] == self.hidsize return x + res # new add class TransitionNetwork(tf.keras.Model): def __init__(self, hidsize=256, name=None): super(TransitionNetwork, self).__init__(name=name) self.hidsize = hidsize self.dense1 = layers.Dense(hidsize, activation=tf.nn.leaky_relu) self.residual_block1 = ResBlock(hidsize) self.residual_block2 = ResBlock(hidsize) self.dense2 = layers.Dense(hidsize, activation=None) def call(self, xs): s, a = xs sh = tf.shape(a) # sh=(None,None,4) assert len(s.get_shape().as_list()) == 3 and s.get_shape().as_list()[-1] in [512, 256] assert len(a.get_shape().as_list()) == 3 x = flatten_two_dims(s) # shape=(None,512) a = flatten_two_dims(a) # shape=(None,4) # x = self.dense1(tf.concat([x, a], axis=-1)) # (None, 256) x = self.residual_block1([x, a]) # (None, 256) x = self.residual_block2([x, a]) # (None, 256) x = self.dense2(tf.concat([x, a], axis=-1)) # (None, 256) x = unflatten_first_dim(x, sh) # shape=(None, None, 256) return x class GenerativeNetworkGaussianFix(tf.keras.Model): def __init__(self, hidsize=256, outsize=512, name=None): super(GenerativeNetworkGaussianFix, self).__init__(name=name) self.outsize = outsize self.dense1 = layers.Dense(hidsize, activation=tf.nn.leaky_relu) self.dense2 = layers.Dense(outsize, activation=tf.nn.leaky_relu) self.var_single = tf.Variable(1.0, trainable=True) self.residual_block1 = tf.keras.Sequential([ layers.Dense(hidsize, activation=tf.nn.leaky_relu), # 256 layers.Dense(hidsize, activation=None) ]) self.residual_block2 = tf.keras.Sequential([ layers.Dense(hidsize, activation=tf.nn.leaky_relu), # 256 layers.Dense(hidsize, activation=None) ]) self.residual_block3 = tf.keras.Sequential([ layers.Dense(outsize, activation=tf.nn.leaky_relu), # 512 layers.Dense(outsize, activation=None) ]) def call(self, z): sh = tf.shape(z) # z, sh=(None,None,128) assert z.get_shape().as_list()[-1] == 128 and len(z.get_shape().as_list()) == 3 z = flatten_two_dims(z) # shape=(None,128) x = self.dense1(z) # (None, 256) x = x + self.residual_block1(x) # (None, 256) x = x + self.residual_block2(x) # (None, 256) # variance var_tile = tf.tile(tf.expand_dims(tf.expand_dims(self.var_single, axis=0), axis=0), [16*128, self.outsize]) # mean x = self.dense2(x) # (None, 512) x = x + self.residual_block3(x) # (None, 512) mean # concat and return x = tf.concat([x, var_tile], axis=-1) # (None, 1024) x = unflatten_first_dim(x, sh) # shape=(None, None, 1024) return x class GenerativeNetworkGaussian(tf.keras.Model): def __init__(self, hidsize=256, outsize=512, name=None): super(GenerativeNetworkGaussian, self).__init__(name=name) self.dense1 = layers.Dense(hidsize, activation=tf.nn.leaky_relu) self.dense2 = layers.Dense(outsize, activation=tf.nn.leaky_relu) self.dense3 = layers.Dense(outsize*2, activation=tf.nn.leaky_relu) self.residual_block1 = tf.keras.Sequential([ layers.Dense(hidsize, activation=tf.nn.leaky_relu), # 256 layers.Dense(hidsize, activation=None) ]) self.residual_block2 = tf.keras.Sequential([ layers.Dense(hidsize, activation=tf.nn.leaky_relu), # 256 layers.Dense(hidsize, activation=None) ]) self.residual_block3 = tf.keras.Sequential([ layers.Dense(outsize, activation=tf.nn.leaky_relu), # 512 layers.Dense(outsize, activation=None) ]) def call(self, z): sh = tf.shape(z) # z, sh=(None,None,128) assert z.get_shape().as_list()[-1] == 128 and len(z.get_shape().as_list()) == 3 z = flatten_two_dims(z) # shape=(None,128) x = self.dense1(z) # (None, 256) x = x + self.residual_block1(x) # (None, 256) x = x + self.residual_block2(x) # (None, 256) x = self.dense2(x) # (None, 512) x = x + self.residual_block3(x) # (None, 512) x = self.dense3(x) # (None, 1024) x = unflatten_first_dim(x, sh) # shape=(None, None, 1024) return x class ProjectionHead(tf.keras.Model): def __init__(self, name=None): super(ProjectionHead, self).__init__(name=name) self.dense1 = layers.Dense(256, activation=None) self.dense2 = layers.Dense(128, activation=None) self.ln1 = layers.LayerNormalization() self.ln2 = layers.LayerNormalization() def call(self, x, ln=False): assert x.get_shape().as_list()[-1] == 512 and len(x.get_shape().as_list()) == 3 x = flatten_two_dims(x) # shape=(None,512) x = self.dense1(x) # shape=(None,256) x = self.ln1(x) # layer norm x = tf.nn.relu(x) # relu x = self.dense2(x) # shape=(None,128) x = self.ln2(x) return x class ContrastiveHead(tf.keras.Model): def __init__(self, temperature, z_dim=128, name=None): super(ContrastiveHead, self).__init__(name=name) self.W = tf.Variable(tf.random.uniform((z_dim, z_dim)), name='W_Contras') self.temperature = temperature def call(self, z_a_pos): z_a, z_pos = z_a_pos Wz = tf.linalg.matmul(self.W, z_pos, transpose_b=True) # (z_dim,B) Wz.shape = (50,32) logits = tf.linalg.matmul(z_a, Wz) # (B,B) logits.shape = (32,32) logits = logits - tf.reduce_max(logits, 1)[:, None] # logits logits = logits * self.temperature return logits def rec_log_prob(rec_params, s_next, min_sigma=1e-2): # rec_params.shape = (None, None, 1024) distr = normal_parse_params(rec_params, min_sigma) log_prob = distr.log_prob(s_next) # (None, None, 512) assert len(log_prob.get_shape().as_list()) == 3 and log_prob.get_shape().as_list()[-1] == 512 return tf.reduce_sum(log_prob, axis=-1) def normal_parse_params(params, min_sigma=0.0): n = params.shape[0] d = params.shape[-1] # channel mu = params[..., :d // 2] # sigma_params = params[..., d // 2:] sigma = tf.math.softplus(sigma_params) sigma = tf.clip_by_value(t=sigma, clip_value_min=min_sigma, clip_value_max=1e5) distr = tfd.Normal(loc=mu, scale=sigma) # return distr def tile_images(array, n_cols=None, max_images=None, div=1): if max_images is not None: array = array[:max_images] if len(array.shape) == 4 and array.shape[3] == 1: array = array[:, :, :, 0] assert len(array.shape) in [3, 4], "wrong number of dimensions - shape {}".format(array.shape) if len(array.shape) == 4: assert array.shape[3] == 3, "wrong number of channels- shape {}".format(array.shape) if n_cols is None: n_cols = max(int(np.sqrt(array.shape[0])) // div * div, div) n_rows = int(np.ceil(float(array.shape[0]) / n_cols)) def cell(i, j): ind = i * n_cols + j return array[ind] if ind < array.shape[0] else np.zeros(array[0].shape) def row(i): return np.concatenate([cell(i, j) for j in range(n_cols)], axis=1) return np.concatenate([row(i) for i in range(n_rows)], axis=0) import distutils.spawn import subprocess def save_np_as_mp4(frames, filename, frames_per_sec=30): print(filename) if distutils.spawn.find_executable('avconv') is not None: backend = 'avconv' elif distutils.spawn.find_executable('ffmpeg') is not None: backend = 'ffmpeg' else: raise NotImplementedError( """Found neither the ffmpeg nor avconv executables. On OS X, you can install ffmpeg via `brew install ffmpeg`. On most Ubuntu variants, `sudo apt-get install ffmpeg` should do it. On Ubuntu 14.04, however, you'll need to install avconv with `sudo apt-get install libav-tools`.""") h, w = frames[0].shape[:2] output_path = filename cmdline = (backend, '-nostats', '-loglevel', 'error', # suppress warnings '-y', '-r', '%d' % frames_per_sec, # input '-f', 'rawvideo', '-s:v', '{}x{}'.format(w, h), '-pix_fmt', 'rgb24', '-i', '-', # this used to be /dev/stdin, which is not Windows-friendly # output '-vcodec', 'libx264', '-pix_fmt', 'yuv420p', output_path) print('saving ', output_path) if hasattr(os, 'setsid'): # setsid not present on Windows process = subprocess.Popen(cmdline, stdin=subprocess.PIPE, preexec_fn=os.setsid) else: process = subprocess.Popen(cmdline, stdin=subprocess.PIPE) process.stdin.write(np.array(frames).tobytes()) process.stdin.close() ret = process.wait() if ret != 0: print("VideoRecorder encoder exited with status {}".format(ret)) # ExponentialSchedule class ExponentialSchedule(object): def __init__(self, start_value, decay_factor, end_value, outside_value=None): """Exponential Schedule. y = start_value * (1.0 - decay_factor) ^ t """ assert 0.0 <= decay_factor <= 1.0 self.start_value = start_value self.decay_factor = decay_factor self.end_value = end_value def value(self, t): v = self.start_value * np.power(1.0 - self.decay_factor, t/int(1e5)) return np.maximum(v, self.end_value) class FrameStack(gym.Wrapper): def __init__(self, env, k): gym.Wrapper.__init__(self, env) self._k = k self._frames = deque([], maxlen=k) shp = env.observation_space.shape self.observation_space = gym.spaces.Box( low=0, high=1, shape=((shp[0] * k,) + shp[1:]), dtype=env.observation_space.dtype ) self._max_episode_steps = env._max_episode_steps def reset(self): obs = self.env.reset() for _ in range(self._k): self._frames.append(obs) return self._get_obs() def step(self, action): obs, reward, done, info = self.env.step(action) self._frames.append(obs) return self._get_obs(), reward, done, info def _get_obs(self): assert len(self._frames) == self._k return np.concatenate(list(self._frames), axis=0)