Curiosity/DPI/utils.py

import os
import random
import pickle
import numpy as np
from collections import deque

import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter


import gym
import dmc2gym

import cv2
from PIL import Image
from typing import Iterable


class eval_mode(object):
    def __init__(self, *models):
        self.models = models

    def __enter__(self):
        self.prev_states = []
        for model in self.models:
            self.prev_states.append(model.training)
            model.train(False)

    def __exit__(self, *args):
        for model, state in zip(self.models, self.prev_states):
            model.train(state)
        return False


def soft_update_params(net, target_net, tau):
    for param, target_param in zip(net.parameters(), target_net.parameters()):
        target_param.data.copy_(
            tau * param.data + (1 - tau) * target_param.data
        )


def set_seed_everywhere(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)


def module_hash(module):
    result = 0
    for tensor in module.state_dict().values():
        result += tensor.sum().item()
    return result


def make_dir(dir_path):
    try:
        os.mkdir(dir_path)
    except OSError:
        pass
    return dir_path


class FrameStack(gym.Wrapper):
    def __init__(self, env, k):
        gym.Wrapper.__init__(self, env)
        self._k = k
        self._frames = deque([], maxlen=k)
        shp = env.observation_space.shape
        self.observation_space = gym.spaces.Box(
            low=0,
            high=1,
            shape=((shp[0] * k,) + shp[1:]),
            dtype=env.observation_space.dtype
        )
        self._max_episode_steps = env._max_episode_steps

    def reset(self):
        obs = self.env.reset()
        for _ in range(self._k):
            self._frames.append(obs)
        return self._get_obs()

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self._frames.append(obs)
        return self._get_obs(), reward, done, info

    def _get_obs(self):
        assert len(self._frames) == self._k
        return np.concatenate(list(self._frames), axis=0)


class ActionRepeat:
    def __init__(self, env, amount):
        self._env = env
        self._amount = amount

    def __getattr__(self, name):
        return getattr(self._env, name)

    def step(self, action):
        done = False
        total_reward = 0
        current_step = 0
        while current_step < self._amount and not done:
            obs, reward, done, info = self._env.step(action)
            total_reward += reward
            current_step += 1
        return obs, total_reward, done, info


class NormalizeActions:
    def __init__(self, env):
        self._env = env
        self._mask = np.logical_and(
            np.isfinite(env.action_space.low),
            np.isfinite(env.action_space.high))
        self._low = np.where(self._mask, env.action_space.low, -1)
        self._high = np.where(self._mask, env.action_space.high, 1)

    def __getattr__(self, name):
        return getattr(self._env, name)

    @property
    def action_space(self):
        low = np.where(self._mask, -np.ones_like(self._low), self._low)
        high = np.where(self._mask, np.ones_like(self._low), self._high)
        return gym.spaces.Box(low, high, dtype=np.float32)

    def step(self, action):
        original = (action + 1) / 2 * (self._high - self._low) + self._low
        original = np.where(self._mask, original, action)
        return self._env.step(original)

class TimeLimit:

    def __init__(self, env, duration):
        self._env = env
        self._duration = duration
        self._step = None

    def __getattr__(self, name):
        return getattr(self._env, name)

    def step(self, action):
        assert self._step is not None, 'Must reset environment.'
        obs, reward, done, info = self._env.step(action)
        self._step += 1
        if self._step >= self._duration:
          done = True
          if 'discount' not in info:
            info['discount'] = np.array(1.0).astype(np.float32)
          self._step = None
        return obs, reward, done, info

    def reset(self):
        self._step = 0
        return self._env.reset()


class ReplayBuffer:

    def __init__(self, size, obs_shape, action_size, seq_len, batch_size, args):

        self.size = size
        self.obs_shape = obs_shape
        self.action_size = action_size
        self.seq_len = seq_len
        self.batch_size = batch_size
        self.idx = 0
        self.full = False
        self.observations = np.empty((size, *obs_shape), dtype=np.uint8) 
        self.next_observations = np.empty((size, *obs_shape), dtype=np.uint8)
        self.actions = np.empty((size, action_size), dtype=np.float32)
        self.rewards = np.empty((size,), dtype=np.float32) 
        self.terminals = np.empty((size,), dtype=np.float32)
        self.steps, self.episodes = 0, 0
        self.episode_count = np.zeros((size,), dtype=np.int32)
    
    def add(self, obs, ac, next_obs, rew, done, episode_count):
        self.observations[self.idx] = obs
        self.next_observations[self.idx] = next_obs
        self.actions[self.idx] = ac
        self.rewards[self.idx] = rew
        self.terminals[self.idx] = done
        self.full = self.full or self.idx == 0
        self.steps += 1
        self.episodes = self.episodes + (1 if done else 0)
        self.episode_count[self.idx] = episode_count
        self.idx = (self.idx + 1) % self.size

    def _sample_idx(self, L):
        valid_idx = False
        while not valid_idx:
            idx = np.random.randint(0, self.size if self.full else self.idx - L)
            idxs = np.arange(idx, idx + L) % self.size
            valid_idx = not self.idx in idxs[1:] 
        return idxs

    def _retrieve_batch(self, idxs, n, L):        
        vec_idxs = idxs.transpose().reshape(-1)  # Unroll indices
        observations = self.observations[vec_idxs]
        next_obs = self.next_observations[vec_idxs]
        obs = observations.reshape(L, n, *observations.shape[1:])
        next_obs = next_obs.reshape(L, n, *next_obs.shape[1:])
        acs = self.actions[vec_idxs].reshape(L, n, -1)
        rew = self.rewards[vec_idxs].reshape(L, n)
        term = self.terminals[vec_idxs].reshape(L, n)
        return obs, acs, next_obs, rew, term

    def sample(self):
        n = self.batch_size
        l = self.seq_len
        obs,acs,next_obs,rews,terms= self._retrieve_batch(np.asarray([self._sample_idx(l) for _ in range(n)]), n, l)
        return obs,acs,next_obs,rews,terms
    

class ReplayBuffer1:
    def __init__(self, size, obs_shape, action_size, seq_len, batch_size, args):
        self.size = size
        self.obs_shape = obs_shape
        self.action_size = action_size
        self.seq_len = seq_len
        self.batch_size = batch_size
        self.idx = 0
        self.full = False
        self.args = args
        self.observations = np.empty((size, *obs_shape), dtype=np.uint8)
        self.actions = np.empty((size, action_size), dtype=np.float32)
        self.rewards = np.empty((size,1), dtype=np.float32)
        self.next_observations = np.empty((size, *obs_shape), dtype=np.uint8)
        self.episode_count = np.zeros((size,), dtype=np.uint8) 
        self.terminals = np.empty((size,), dtype=np.float32)
        self.steps, self.episodes = 0, 0
    
    def add(self, obs, ac, next_obs, rew, episode_count, done):
        self.observations[self.idx] = obs
        self.actions[self.idx] = ac
        self.next_observations[self.idx] = next_obs
        self.rewards[self.idx] = rew
        self.episode_count[self.idx] = episode_count
        self.terminals[self.idx] = done
        self.idx = (self.idx + 1) % self.size
        self.full = self.full or self.idx == 0
        self.steps += 1 
        self.episodes = self.episodes + (1 if done else 0)

    def _sample_idx(self, L):
        valid_idx = False
        while not valid_idx:
            idx = np.random.randint(0, self.size if self.full else self.idx - L)
            idxs = np.arange(idx, idx + L) % self.size
            valid_idx = not self.idx in idxs[1:] 
        return idxs

    def _retrieve_batch(self, idxs, n, L):
        vec_idxs = idxs.transpose().reshape(-1)  # Unroll indices
        observations = self.observations[vec_idxs]
        next_observations = self.next_observations[vec_idxs]
        return observations.reshape(L, n, *observations.shape[1:]), self.actions[vec_idxs].reshape(L, n, -1), observations.reshape(L, n, *next_observations.shape[1:]), \
                self.rewards[vec_idxs].reshape(L, n), self.terminals[vec_idxs].reshape(L, n)

    def sample(self):
        n = self.batch_size
        l = self.seq_len
        obs,acs,rews,terms= self._retrieve_batch(np.asarray([self._sample_idx(l) for _ in range(n)]), n, l)
        return obs,acs,rews,terms
    
    def group_steps(self, buffer, variable, obs=True):
        variable = getattr(buffer, variable)
        non_zero_indices = np.nonzero(buffer.episode_count)[0]
        print(buffer.episode_count)
        variable = variable[non_zero_indices]
        print(variable.shape)
        exit()

        if obs:
            variable = variable.reshape(-1, self.args.episode_length,
                                        self.args.frame_stack*self.args.channels,
                                        self.args.image_size,self.args.image_size).transpose(1, 0, 2, 3, 4)
        else:
            variable = variable.reshape(variable.shape[0]//self.args.episode_length, self.args.episode_length, -1).transpose(1, 0, 2)
        return variable

    def transform_grouped_steps(self, variable):
        variable = variable.transpose((1, 0, 2, 3, 4))
        variable = variable.reshape(self.args.batch_size*self.args.episode_length,self.args.frame_stack*self.args.channels,
                                    self.args.image_size,self.args.image_size)
        return variable
    
    def sample_random_idx(self, buffer_length, last=False):
        init = 0 if last else buffer_length - self.args.batch_size
        random_indices = random.sample(range(init, buffer_length), self.args.batch_size)
        return random_indices

    def group_and_sample_random_batch(self, buffer, variable_name, device, random_indices, is_obs=True, offset=0):
        if offset == 0:
            variable_tensor = torch.tensor(self.group_steps(buffer,variable_name, is_obs)).float()[:self.args.episode_length-1].to(device)
        else:
            variable_tensor = torch.tensor(self.group_steps(buffer,variable_name, is_obs)).float()[offset:].to(device)
        return variable_tensor[:,random_indices,:,:,:] if is_obs else variable_tensor[:,random_indices,:]

def make_env(args):
    # For making ground plane transparent, change rgba to (0, 0, 0, 0) in local_dm_control_suite/{domain_name}.xml, 
    # else change to (0.5, 0.5, 0.5, 1.0) for default ground plane color
    # https://mujoco.readthedocs.io/en/stable/XMLreference.html#body-geom
    env = dmc2gym.make(
            domain_name=args.domain_name,
            task_name=args.task_name,
            resource_files=args.resource_files,
            img_source=args.img_source,
            total_frames=args.total_frames,
            seed=args.seed,
            visualize_reward=False,
            from_pixels=(args.encoder_type == 'pixel'),
            height=args.image_size,
            width=args.image_size,
            frame_skip=args.action_repeat,
            video_recording=args.save_video,
            video_recording_dir=args.work_dir,
            version=args.version,
        )
    return env

def shuffle_along_axis(a, axis):
    idx = np.random.rand(*a.shape).argsort(axis=axis)
    return np.take_along_axis(a,idx,axis=axis)

def preprocess_obs(obs):
    obs = (obs/255.0) - 0.5
    return obs

def soft_update_params(net, target_net, tau):
    for param, target_param in zip(net.parameters(), target_net.parameters()):
        target_param.data.copy_(
            tau * param.detach().data + (1 - tau) * target_param.data
        )

def save_image(array, filename):
    array = array.transpose(1, 2, 0)
    array = ((array+0.5) * 255).astype(np.uint8)
    image = Image.fromarray(array)
    image.save(filename)

def video_from_array(arr, high_noise, filename):
    """
    Save a video from a numpy array of shape (T, H, W, C)
    Example:
        video_from_array(np.random.rand(100, 64, 64, 1), 'test.mp4')
    """
    if arr.shape[-1] == 1:
            height, width, channels = arr.shape[1:]
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter('output.mp4', fourcc, 30.0, (width, height))
            for i in range(arr.shape[0]):
                frame = arr[i]
                frame = np.uint8(frame)
                frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
                out.write(frame)
            out.release()

def save_video(images):
    """
    Image shape is (T, C, H, W)
    Example:(50, 3, 84, 84)
    """
    output_file = "output.avi"
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fps = 2
    height, width, channels = 84,84,3
    out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))
    for image in images:
        image = np.uint8(image.transpose((1, 2, 0)))
        out.write(image)
    out.release()

class CorruptVideos:
    def __init__(self, dir_path):
        self.dir_path = dir_path

    def _is_video_corrupt(self,filepath):
        """
        Check if a video file is corrupt.

        Args:
            dir_path (str): Path to the video file.

        Returns:
            bool: True if the video is corrupt, False otherwise.
        """
        # Open the video file
        cap = cv2.VideoCapture(filepath)
        if not cap.isOpened():
            return True
        ret, frame = cap.read()
        if not ret:
            return True
        cap.release()
        return False

    def _delete_corrupt_video(self, filepath):
        os.remove(filepath)

    def is_video_corrupt(self, delete=False):
        for filename in os.listdir(self.dir_path):
            filepath = os.path.join(self.dir_path, filename)
            if filepath.endswith(".mp4"):
                if self._is_video_corrupt(filepath):
                    print(f"{filepath} is corrupt.")
                    if delete:
                        self._delete_corrupt_video(filepath)
                        print(f"Deleted {filepath}")


def get_parameters(modules: Iterable[nn.Module]):
    """
    Given a list of torch modules, returns a list of their parameters.
    :param modules: iterable of modules
    :returns: a list of parameters
    """
    model_parameters = []
    for module in modules:
        model_parameters += list(module.parameters())
    return model_parameters

class FreezeParameters:
    def __init__(self, modules: Iterable[nn.Module]):
        """
        Context manager to locally freeze gradients.
        In some cases with can speed up computation because gradients aren't calculated for these listed modules.
        example:
        ```
        with FreezeParameters([module]):
          output_tensor = module(input_tensor)
        ```
        :param modules: iterable of modules. used to call .parameters() to freeze gradients.
        """
        self.modules = modules
        self.param_states = [p.requires_grad for p in get_parameters(self.modules)]

    def __enter__(self):

        for param in get_parameters(self.modules):
            param.requires_grad = False

    def __exit__(self, exc_type, exc_val, exc_tb):
        for i, param in enumerate(get_parameters(self.modules)):
            param.requires_grad = self.param_states[i]

class Logger:

    def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
        self._log_dir = log_dir
        print('########################')
        print('logging outputs to ', log_dir)
        print('########################')
        self._n_logged_samples = n_logged_samples
        self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1)

    def log_scalar(self, scalar, name, step_):
        self._summ_writer.add_scalar('{}'.format(name), scalar, step_)

    def log_scalars(self, scalar_dict, step):
        for key, value in scalar_dict.items():
            print('{} : {}'.format(key, value))
            self.log_scalar(value, key, step)
        self.dump_scalars_to_pickle(scalar_dict, step)

    def log_videos(self, videos, step, max_videos_to_save=1, fps=20, video_title='video'):

        # max rollout length
        max_videos_to_save = np.min([max_videos_to_save, videos.shape[0]])
        max_length = videos[0].shape[0]
        for i in range(max_videos_to_save):
            if videos[i].shape[0]>max_length:
                max_length = videos[i].shape[0]

        # pad rollouts to all be same length
        for i in range(max_videos_to_save):
            if videos[i].shape[0]<max_length:
                padding = np.tile([videos[i][-1]], (max_length-videos[i].shape[0],1,1,1))
                videos[i] = np.concatenate([videos[i], padding], 0)

            clip = mpy.ImageSequenceClip(list(videos[i]), fps=fps)
            new_video_title = video_title+'{}_{}'.format(step, i) + '.gif'
            filename = os.path.join(self._log_dir, new_video_title)
            video.write_gif(filename, fps =fps)


    def dump_scalars_to_pickle(self, metrics, step, log_title=None):
        log_path = os.path.join(self._log_dir, "scalar_data.pkl" if log_title is None else log_title)
        with open(log_path, 'ab') as f:
            pickle.dump({'step': step, **dict(metrics)}, f)

    def flush(self):
        self._summ_writer.flush()
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00			`import os`
Adding high noise by randomising the frames 2023-03-25 16:07:07 +00:00			`import random`
Adding after some changes 2023-04-22 11:07:22 +00:00			`import pickle`
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00			`import numpy as np`
Adding high noise by randomising the frames 2023-03-25 16:07:07 +00:00			`from collections import deque`

			`import torch`
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00			`import torch.nn as nn`
Adding after some changes 2023-04-22 11:07:22 +00:00			`from torch.utils.tensorboard import SummaryWriter`

Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00
			`import gym`
			`import dmc2gym`

Adding high noise by randomising the frames 2023-03-25 16:07:07 +00:00			`import cv2`
Implementing ICLUB 2023-03-24 19:39:14 +00:00			`from PIL import Image`
Add a class to freeze parameters 2023-04-10 18:17:44 +00:00			`from typing import Iterable`
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00

			`class eval_mode(object):`
			`def __init__(self, *models):`
			`self.models = models`

			`def __enter__(self):`
			`self.prev_states = []`
			`for model in self.models:`
			`self.prev_states.append(model.training)`
			`model.train(False)`

			`def __exit__(self, *args):`
			`for model, state in zip(self.models, self.prev_states):`
			`model.train(state)`
			`return False`


			`def soft_update_params(net, target_net, tau):`
			`for param, target_param in zip(net.parameters(), target_net.parameters()):`
			`target_param.data.copy_(`
			`tau * param.data + (1 - tau) * target_param.data`
			`)`


			`def set_seed_everywhere(seed):`
			`torch.manual_seed(seed)`
			`if torch.cuda.is_available():`
			`torch.cuda.manual_seed_all(seed)`
			`np.random.seed(seed)`
			`random.seed(seed)`


			`def module_hash(module):`
			`result = 0`
			`for tensor in module.state_dict().values():`
			`result += tensor.sum().item()`
			`return result`


			`def make_dir(dir_path):`
			`try:`
			`os.mkdir(dir_path)`
			`except OSError:`
			`pass`
			`return dir_path`



			`class FrameStack(gym.Wrapper):`
			`def __init__(self, env, k):`
			`gym.Wrapper.__init__(self, env)`
			`self._k = k`
			`self._frames = deque([], maxlen=k)`
			`shp = env.observation_space.shape`
			`self.observation_space = gym.spaces.Box(`
			`low=0,`
			`high=1,`
			`shape=((shp[0] * k,) + shp[1:]),`
			`dtype=env.observation_space.dtype`
			`)`
			`self._max_episode_steps = env._max_episode_steps`

			`def reset(self):`
			`obs = self.env.reset()`
			`for _ in range(self._k):`
			`self._frames.append(obs)`
			`return self._get_obs()`

			`def step(self, action):`
			`obs, reward, done, info = self.env.step(action)`
			`self._frames.append(obs)`
			`return self._get_obs(), reward, done, info`

			`def _get_obs(self):`
			`assert len(self._frames) == self._k`
			`return np.concatenate(list(self._frames), axis=0)`


Adding Environment Wrapper and including index randomization for trajectory selection 2023-04-13 16:41:15 +00:00			`class ActionRepeat:`
			`def __init__(self, env, amount):`
			`self._env = env`
			`self._amount = amount`

			`def __getattr__(self, name):`
			`return getattr(self._env, name)`

			`def step(self, action):`
			`done = False`
			`total_reward = 0`
			`current_step = 0`
			`while current_step < self._amount and not done:`
			`obs, reward, done, info = self._env.step(action)`
			`total_reward += reward`
			`current_step += 1`
			`return obs, total_reward, done, info`


			`class NormalizeActions:`
			`def __init__(self, env):`
			`self._env = env`
			`self._mask = np.logical_and(`
			`np.isfinite(env.action_space.low),`
			`np.isfinite(env.action_space.high))`
			`self._low = np.where(self._mask, env.action_space.low, -1)`
			`self._high = np.where(self._mask, env.action_space.high, 1)`

			`def __getattr__(self, name):`
			`return getattr(self._env, name)`

			`@property`
			`def action_space(self):`
			`low = np.where(self._mask, -np.ones_like(self._low), self._low)`
			`high = np.where(self._mask, np.ones_like(self._low), self._high)`
			`return gym.spaces.Box(low, high, dtype=np.float32)`

			`def step(self, action):`
			`original = (action + 1) / 2 * (self._high - self._low) + self._low`
			`original = np.where(self._mask, original, action)`
			`return self._env.step(original)`

Adding after some changes 2023-04-22 11:07:22 +00:00			`class TimeLimit:`

			`def __init__(self, env, duration):`
			`self._env = env`
			`self._duration = duration`
			`self._step = None`

			`def __getattr__(self, name):`
			`return getattr(self._env, name)`

			`def step(self, action):`
			`assert self._step is not None, 'Must reset environment.'`
			`obs, reward, done, info = self._env.step(action)`
			`self._step += 1`
			`if self._step >= self._duration:`
			`done = True`
			`if 'discount' not in info:`
			`info['discount'] = np.array(1.0).astype(np.float32)`
			`self._step = None`
			`return obs, reward, done, info`

			`def reset(self):`
			`self._step = 0`
			`return self._env.reset()`

Adding Environment Wrapper and including index randomization for trajectory selection 2023-04-13 16:41:15 +00:00
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00			`class ReplayBuffer:`
Adding after some changes 2023-04-22 11:07:22 +00:00
			`def __init__(self, size, obs_shape, action_size, seq_len, batch_size, args):`

			`self.size = size`
			`self.obs_shape = obs_shape`
			`self.action_size = action_size`
			`self.seq_len = seq_len`
			`self.batch_size = batch_size`
			`self.idx = 0`
			`self.full = False`
			`self.observations = np.empty((size, *obs_shape), dtype=np.uint8)`
			`self.next_observations = np.empty((size, *obs_shape), dtype=np.uint8)`
			`self.actions = np.empty((size, action_size), dtype=np.float32)`
			`self.rewards = np.empty((size,), dtype=np.float32)`
			`self.terminals = np.empty((size,), dtype=np.float32)`
			`self.steps, self.episodes = 0, 0`
			`self.episode_count = np.zeros((size,), dtype=np.int32)`

			`def add(self, obs, ac, next_obs, rew, done, episode_count):`
			`self.observations[self.idx] = obs`
			`self.next_observations[self.idx] = next_obs`
			`self.actions[self.idx] = ac`
			`self.rewards[self.idx] = rew`
			`self.terminals[self.idx] = done`
			`self.full = self.full or self.idx == 0`
			`self.steps += 1`
			`self.episodes = self.episodes + (1 if done else 0)`
			`self.episode_count[self.idx] = episode_count`
			`self.idx = (self.idx + 1) % self.size`

			`def _sample_idx(self, L):`
			`valid_idx = False`
			`while not valid_idx:`
			`idx = np.random.randint(0, self.size if self.full else self.idx - L)`
			`idxs = np.arange(idx, idx + L) % self.size`
			`valid_idx = not self.idx in idxs[1:]`
			`return idxs`

			`def _retrieve_batch(self, idxs, n, L):`
			`vec_idxs = idxs.transpose().reshape(-1) # Unroll indices`
			`observations = self.observations[vec_idxs]`
			`next_obs = self.next_observations[vec_idxs]`
			`obs = observations.reshape(L, n, *observations.shape[1:])`
			`next_obs = next_obs.reshape(L, n, *next_obs.shape[1:])`
			`acs = self.actions[vec_idxs].reshape(L, n, -1)`
			`rew = self.rewards[vec_idxs].reshape(L, n)`
			`term = self.terminals[vec_idxs].reshape(L, n)`
			`return obs, acs, next_obs, rew, term`

			`def sample(self):`
			`n = self.batch_size`
			`l = self.seq_len`
			`obs,acs,next_obs,rews,terms= self._retrieve_batch(np.asarray([self._sample_idx(l) for _ in range(n)]), n, l)`
			`return obs,acs,next_obs,rews,terms`


			`class ReplayBuffer1:`
Implementing ICLUB 2023-03-24 19:39:14 +00:00			`def __init__(self, size, obs_shape, action_size, seq_len, batch_size, args):`
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00			`self.size = size`
			`self.obs_shape = obs_shape`
			`self.action_size = action_size`
			`self.seq_len = seq_len`
			`self.batch_size = batch_size`
			`self.idx = 0`
			`self.full = False`
Implementing ICLUB 2023-03-24 19:39:14 +00:00			`self.args = args`
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00			`self.observations = np.empty((size, *obs_shape), dtype=np.uint8)`
			`self.actions = np.empty((size, action_size), dtype=np.float32)`
Adding Rewards 2023-04-12 07:33:19 +00:00			`self.rewards = np.empty((size,1), dtype=np.float32)`
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00			`self.next_observations = np.empty((size, *obs_shape), dtype=np.uint8)`
			`self.episode_count = np.zeros((size,), dtype=np.uint8)`
			`self.terminals = np.empty((size,), dtype=np.float32)`
			`self.steps, self.episodes = 0, 0`

Adding Rewards 2023-04-12 07:33:19 +00:00			`def add(self, obs, ac, next_obs, rew, episode_count, done):`
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00			`self.observations[self.idx] = obs`
			`self.actions[self.idx] = ac`
			`self.next_observations[self.idx] = next_obs`
Adding Rewards 2023-04-12 07:33:19 +00:00			`self.rewards[self.idx] = rew`
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00			`self.episode_count[self.idx] = episode_count`
			`self.terminals[self.idx] = done`
			`self.idx = (self.idx + 1) % self.size`
			`self.full = self.full or self.idx == 0`
			`self.steps += 1`
			`self.episodes = self.episodes + (1 if done else 0)`

			`def _sample_idx(self, L):`
			`valid_idx = False`
			`while not valid_idx:`
			`idx = np.random.randint(0, self.size if self.full else self.idx - L)`
			`idxs = np.arange(idx, idx + L) % self.size`
			`valid_idx = not self.idx in idxs[1:]`
			`return idxs`

			`def _retrieve_batch(self, idxs, n, L):`
			`vec_idxs = idxs.transpose().reshape(-1) # Unroll indices`
			`observations = self.observations[vec_idxs]`
			`next_observations = self.next_observations[vec_idxs]`
			`return observations.reshape(L, n, observations.shape[1:]), self.actions[vec_idxs].reshape(L, n, -1), observations.reshape(L, n, next_observations.shape[1:]), \`
			`self.rewards[vec_idxs].reshape(L, n), self.terminals[vec_idxs].reshape(L, n)`

			`def sample(self):`
			`n = self.batch_size`
			`l = self.seq_len`
			`obs,acs,rews,terms= self._retrieve_batch(np.asarray([self._sample_idx(l) for _ in range(n)]), n, l)`
			`return obs,acs,rews,terms`

Grouping for actions too 2023-03-27 17:22:47 +00:00			`def group_steps(self, buffer, variable, obs=True):`
Implementing ICLUB 2023-03-24 19:39:14 +00:00			`variable = getattr(buffer, variable)`
			`non_zero_indices = np.nonzero(buffer.episode_count)[0]`
Adding after some changes 2023-04-22 11:07:22 +00:00			`print(buffer.episode_count)`
Implementing ICLUB 2023-03-24 19:39:14 +00:00			`variable = variable[non_zero_indices]`
Adding after some changes 2023-04-22 11:07:22 +00:00			`print(variable.shape)`
			`exit()`

Grouping for actions too 2023-03-27 17:22:47 +00:00			`if obs:`
Adding Environment Wrapper and including index randomization for trajectory selection 2023-04-13 16:41:15 +00:00			`variable = variable.reshape(-1, self.args.episode_length,`
Changing variable reshaping strategy 2023-04-09 16:22:12 +00:00			`self.args.frame_stack*self.args.channels,`
			`self.args.image_size,self.args.image_size).transpose(1, 0, 2, 3, 4)`
Grouping for actions too 2023-03-27 17:22:47 +00:00			`else:`
Adding Environment Wrapper and including index randomization for trajectory selection 2023-04-13 16:41:15 +00:00			`variable = variable.reshape(variable.shape[0]//self.args.episode_length, self.args.episode_length, -1).transpose(1, 0, 2)`
Implementing ICLUB 2023-03-24 19:39:14 +00:00			`return variable`

			`def transform_grouped_steps(self, variable):`
			`variable = variable.transpose((1, 0, 2, 3, 4))`
			`variable = variable.reshape(self.args.batch_sizeself.args.episode_length,self.args.frame_stackself.args.channels,`
			`self.args.image_size,self.args.image_size)`
			`return variable`

Adding after some changes 2023-04-22 11:07:22 +00:00			`def sample_random_idx(self, buffer_length, last=False):`
			`init = 0 if last else buffer_length - self.args.batch_size`
			`random_indices = random.sample(range(init, buffer_length), self.args.batch_size)`
Adding Environment Wrapper and including index randomization for trajectory selection 2023-04-13 16:41:15 +00:00			`return random_indices`

			`def group_and_sample_random_batch(self, buffer, variable_name, device, random_indices, is_obs=True, offset=0):`
			`if offset == 0:`
			`variable_tensor = torch.tensor(self.group_steps(buffer,variable_name, is_obs)).float()[:self.args.episode_length-1].to(device)`
			`else:`
			`variable_tensor = torch.tensor(self.group_steps(buffer,variable_name, is_obs)).float()[offset:].to(device)`
			`return variable_tensor[:,random_indices,:,:,:] if is_obs else variable_tensor[:,random_indices,:]`
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00
			`def make_env(args):`
Adding new background videos for each episode 2023-03-25 13:18:07 +00:00			`# For making ground plane transparent, change rgba to (0, 0, 0, 0) in local_dm_control_suite/{domain_name}.xml,`
			`# else change to (0.5, 0.5, 0.5, 1.0) for default ground plane color`
			`# https://mujoco.readthedocs.io/en/stable/XMLreference.html#body-geom`
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00			`env = dmc2gym.make(`
			`domain_name=args.domain_name,`
			`task_name=args.task_name,`
			`resource_files=args.resource_files,`
			`img_source=args.img_source,`
			`total_frames=args.total_frames,`
			`seed=args.seed,`
			`visualize_reward=False,`
			`from_pixels=(args.encoder_type == 'pixel'),`
			`height=args.image_size,`
			`width=args.image_size,`
Adding high noise by randomising the frames 2023-03-25 16:07:07 +00:00			`frame_skip=args.action_repeat,`
			`video_recording=args.save_video,`
			`video_recording_dir=args.work_dir,`
Changing names for clean and noisy environments via version 2023-03-28 18:21:58 +00:00			`version=args.version,`
Adding Denoised Predictive Imagination 2023-03-23 14:05:28 +00:00			`)`
Implementing ICLUB 2023-03-24 19:39:14 +00:00			`return env`

Adding after some changes 2023-04-22 11:07:22 +00:00			`def shuffle_along_axis(a, axis):`
Adding some new things 2023-04-24 15:42:37 +00:00			`idx = np.random.rand(*a.shape).argsort(axis=axis)`
			`return np.take_along_axis(a,idx,axis=axis)`
Adding after some changes 2023-04-22 11:07:22 +00:00
Adding preprocessing function 2023-04-12 15:16:47 +00:00			`def preprocess_obs(obs):`
Adding after some changes 2023-04-22 11:07:22 +00:00			`obs = (obs/255.0) - 0.5`
Adding preprocessing function 2023-04-12 15:16:47 +00:00			`return obs`

Add a class to freeze parameters 2023-04-10 18:17:44 +00:00			`def soft_update_params(net, target_net, tau):`
			`for param, target_param in zip(net.parameters(), target_net.parameters()):`
			`target_param.data.copy_(`
Adding after some changes 2023-04-22 11:07:22 +00:00			`tau * param.detach().data + (1 - tau) * target_param.data`
Add a class to freeze parameters 2023-04-10 18:17:44 +00:00			`)`

Implementing ICLUB 2023-03-24 19:39:14 +00:00			`def save_image(array, filename):`
			`array = array.transpose(1, 2, 0)`
Adding after some changes 2023-04-22 11:07:22 +00:00			`array = ((array+0.5) * 255).astype(np.uint8)`
Implementing ICLUB 2023-03-24 19:39:14 +00:00			`image = Image.fromarray(array)`
Adding high noise by randomising the frames 2023-03-25 16:07:07 +00:00			`image.save(filename)`

			`def video_from_array(arr, high_noise, filename):`
			`"""`
			`Save a video from a numpy array of shape (T, H, W, C)`
			`Example:`
			`video_from_array(np.random.rand(100, 64, 64, 1), 'test.mp4')`
			`"""`
			`if arr.shape[-1] == 1:`
			`height, width, channels = arr.shape[1:]`
			`fourcc = cv2.VideoWriter_fourcc(*'mp4v')`
			`out = cv2.VideoWriter('output.mp4', fourcc, 30.0, (width, height))`
			`for i in range(arr.shape[0]):`
			`frame = arr[i]`
			`frame = np.uint8(frame)`
			`frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)`
			`out.write(frame)`
Detecting and removing corrupt video files from the dataset 2023-03-25 16:36:58 +00:00			`out.release()`

Adding some new things 2023-04-24 15:42:37 +00:00			`def save_video(images):`
			`"""`
			`Image shape is (T, C, H, W)`
			`Example:(50, 3, 84, 84)`
			`"""`
			`output_file = "output.avi"`
			`fourcc = cv2.VideoWriter_fourcc(*'XVID')`
			`fps = 2`
			`height, width, channels = 84,84,3`
			`out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))`
			`for image in images:`
			`image = np.uint8(image.transpose((1, 2, 0)))`
			`out.write(image)`
			`out.release()`
Detecting and removing corrupt video files from the dataset 2023-03-25 16:36:58 +00:00
			`class CorruptVideos:`
			`def __init__(self, dir_path):`
			`self.dir_path = dir_path`

			`def _is_video_corrupt(self,filepath):`
			`"""`
			`Check if a video file is corrupt.`

			`Args:`
Grouping for actions too 2023-03-27 17:22:47 +00:00			`dir_path (str): Path to the video file.`
Detecting and removing corrupt video files from the dataset 2023-03-25 16:36:58 +00:00
			`Returns:`
			`bool: True if the video is corrupt, False otherwise.`
			`"""`
			`# Open the video file`
			`cap = cv2.VideoCapture(filepath)`
			`if not cap.isOpened():`
			`return True`
			`ret, frame = cap.read()`
			`if not ret:`
			`return True`
			`cap.release()`
			`return False`

			`def _delete_corrupt_video(self, filepath):`
			`os.remove(filepath)`

			`def is_video_corrupt(self, delete=False):`
			`for filename in os.listdir(self.dir_path):`
			`filepath = os.path.join(self.dir_path, filename)`
			`if filepath.endswith(".mp4"):`
			`if self._is_video_corrupt(filepath):`
			`print(f"{filepath} is corrupt.")`
			`if delete:`
			`self._delete_corrupt_video(filepath)`
Add a class to freeze parameters 2023-04-10 18:17:44 +00:00			`print(f"Deleted {filepath}")`


			`def get_parameters(modules: Iterable[nn.Module]):`
			`"""`
			`Given a list of torch modules, returns a list of their parameters.`
			`:param modules: iterable of modules`
			`:returns: a list of parameters`
			`"""`
			`model_parameters = []`
			`for module in modules:`
			`model_parameters += list(module.parameters())`
			`return model_parameters`

			`class FreezeParameters:`
			`def __init__(self, modules: Iterable[nn.Module]):`
			`"""`
			`Context manager to locally freeze gradients.`
			`In some cases with can speed up computation because gradients aren't calculated for these listed modules.`
			`example:`
			```
			`with FreezeParameters([module]):`
			`output_tensor = module(input_tensor)`
			```
			`:param modules: iterable of modules. used to call .parameters() to freeze gradients.`
			`"""`
			`self.modules = modules`
			`self.param_states = [p.requires_grad for p in get_parameters(self.modules)]`

			`def __enter__(self):`

			`for param in get_parameters(self.modules):`
			`param.requires_grad = False`

			`def __exit__(self, exc_type, exc_val, exc_tb):`
			`for i, param in enumerate(get_parameters(self.modules)):`
Adding after some changes 2023-04-22 11:07:22 +00:00			`param.requires_grad = self.param_states[i]`

			`class Logger:`

			`def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):`
			`self._log_dir = log_dir`
			`print('########################')`
			`print('logging outputs to ', log_dir)`
			`print('########################')`
			`self._n_logged_samples = n_logged_samples`
			`self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1)`

			`def log_scalar(self, scalar, name, step_):`
			`self._summ_writer.add_scalar('{}'.format(name), scalar, step_)`

			`def log_scalars(self, scalar_dict, step):`
			`for key, value in scalar_dict.items():`
			`print('{} : {}'.format(key, value))`
			`self.log_scalar(value, key, step)`
			`self.dump_scalars_to_pickle(scalar_dict, step)`

			`def log_videos(self, videos, step, max_videos_to_save=1, fps=20, video_title='video'):`

			`# max rollout length`
			`max_videos_to_save = np.min([max_videos_to_save, videos.shape[0]])`
			`max_length = videos[0].shape[0]`
			`for i in range(max_videos_to_save):`
			`if videos[i].shape[0]>max_length:`
			`max_length = videos[i].shape[0]`

			`# pad rollouts to all be same length`
			`for i in range(max_videos_to_save):`
			`if videos[i].shape[0]<max_length:`
			`padding = np.tile([videos[i][-1]], (max_length-videos[i].shape[0],1,1,1))`
			`videos[i] = np.concatenate([videos[i], padding], 0)`

			`clip = mpy.ImageSequenceClip(list(videos[i]), fps=fps)`
			`new_video_title = video_title+'{}_{}'.format(step, i) + '.gif'`
			`filename = os.path.join(self._log_dir, new_video_title)`
			`video.write_gif(filename, fps =fps)`


			`def dump_scalars_to_pickle(self, metrics, step, log_title=None):`
			`log_path = os.path.join(self._log_dir, "scalar_data.pkl" if log_title is None else log_title)`
			`with open(log_path, 'ab') as f:`
			`pickle.dump({'step': step, **dict(metrics)}, f)`

			`def flush(self):`
			`self._summ_writer.flush()`