Adding Environment Wrapper and including index randomization for trajectory selection

2023-04-13 18:41:15 +02:00 · 2023-04-13 18:41:15 +02:00 · 9aa07fed6a
commit 9aa07fed6a
parent 233ca77aa4
1 changed files with 55 additions and 8 deletions
--- a/DPI/utils.py
+++ b/DPI/utils.py
@ -1,9 +1,3 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
 import os
 import random
 import numpy as np
@ -108,6 +102,49 @@ class FrameStack(gym.Wrapper):
        return np.concatenate(list(self._frames), axis=0)


+class ActionRepeat:
+    def __init__(self, env, amount):
+        self._env = env
+        self._amount = amount
+
+    def __getattr__(self, name):
+        return getattr(self._env, name)
+
+    def step(self, action):
+        done = False
+        total_reward = 0
+        current_step = 0
+        while current_step < self._amount and not done:
+            obs, reward, done, info = self._env.step(action)
+            total_reward += reward
+            current_step += 1
+        return obs, total_reward, done, info
+
+
+class NormalizeActions:
+    def __init__(self, env):
+        self._env = env
+        self._mask = np.logical_and(
+            np.isfinite(env.action_space.low),
+            np.isfinite(env.action_space.high))
+        self._low = np.where(self._mask, env.action_space.low, -1)
+        self._high = np.where(self._mask, env.action_space.high, 1)
+
+    def __getattr__(self, name):
+        return getattr(self._env, name)
+
+    @property
+    def action_space(self):
+        low = np.where(self._mask, -np.ones_like(self._low), self._low)
+        high = np.where(self._mask, np.ones_like(self._low), self._high)
+        return gym.spaces.Box(low, high, dtype=np.float32)
+
+    def step(self, action):
+        original = (action + 1) / 2 * (self._high - self._low) + self._low
+        original = np.where(self._mask, original, action)
+        return self._env.step(original)
+
+
 class ReplayBuffer:
    def __init__(self, size, obs_shape, action_size, seq_len, batch_size, args):
        self.size = size
@ -164,11 +201,11 @@ class ReplayBuffer:
        non_zero_indices = np.nonzero(buffer.episode_count)[0]
        variable = variable[non_zero_indices]
        if obs:
-            variable = variable.reshape(self.args.batch_size, self.args.episode_length,
+            variable = variable.reshape(-1, self.args.episode_length,
                                        self.args.frame_stack*self.args.channels,
                                        self.args.image_size,self.args.image_size).transpose(1, 0, 2, 3, 4)
        else:
-            variable = variable.reshape(self.args.batch_size, self.args.episode_length, -1).transpose(1, 0, 2)
+            variable = variable.reshape(variable.shape[0]//self.args.episode_length, self.args.episode_length, -1).transpose(1, 0, 2)
        return variable

    def transform_grouped_steps(self, variable):
@ -177,6 +214,16 @@ class ReplayBuffer:
                                    self.args.image_size,self.args.image_size)
        return variable
    
+    def sample_random_idx(self, buffer_length):
+        random_indices = random.sample(range(0, buffer_length), self.args.batch_size) 
+        return random_indices
+
+    def group_and_sample_random_batch(self, buffer, variable_name, device, random_indices, is_obs=True, offset=0):
+        if offset == 0:
+            variable_tensor = torch.tensor(self.group_steps(buffer,variable_name, is_obs)).float()[:self.args.episode_length-1].to(device)
+        else:
+            variable_tensor = torch.tensor(self.group_steps(buffer,variable_name, is_obs)).float()[offset:].to(device)
+        return variable_tensor[:,random_indices,:,:,:] if is_obs else variable_tensor[:,random_indices,:]

 def make_env(args):
    # For making ground plane transparent, change rgba to (0, 0, 0, 0) in local_dm_control_suite/{domain_name}.xml,