diff --git a/encoder.py b/encoder.py index b137a62..be32fd6 100644 --- a/encoder.py +++ b/encoder.py @@ -10,7 +10,7 @@ def tie_weights(src, trg): OUT_DIM = {2: 39, 4: 35, 6: 31} - +''' class PixelEncoder(nn.Module): """Convolutional encoder of pixels observations.""" def __init__(self, obs_shape, feature_dim, num_layers=2, num_filters=32): @@ -88,7 +88,92 @@ class PixelEncoder(nn.Module): L.log_param('train_encoder/conv%s' % (i + 1), self.convs[i], step) L.log_param('train_encoder/fc', self.fc, step) L.log_param('train_encoder/ln', self.ln, step) +''' +class PixelEncoder(nn.Module): + """Convolutional encoder of pixels observations.""" + def __init__(self, obs_shape, feature_dim, num_layers=2, num_filters=32): + super().__init__() + + assert len(obs_shape) == 3 + + self.feature_dim = feature_dim + self.num_layers = num_layers + + self.convs = nn.ModuleList( + [nn.Conv2d(obs_shape[0], num_filters, 3, stride=2)] + ) + for i in range(num_layers - 1): + self.convs.append(nn.Conv2d(num_filters, num_filters, 3, stride=1)) + + out_dim = OUT_DIM[num_layers] + self.fc = nn.Linear(num_filters * out_dim * out_dim, self.feature_dim * 2) + self.ln = nn.LayerNorm(self.feature_dim * 2) + + self.outputs = dict() + + def reparameterize(self, mu, logstd): + std = torch.exp(logstd) + eps = torch.randn_like(std) + return mu + eps * std + + def forward_conv(self, obs): + obs = obs / 255. + self.outputs['obs'] = obs + + conv = torch.relu(self.convs[0](obs)) + self.outputs['conv1'] = conv + + for i in range(1, self.num_layers): + conv = torch.relu(self.convs[i](conv)) + self.outputs['conv%s' % (i + 1)] = conv + + h = conv.view(conv.size(0), -1) + return h + + def forward(self, obs, detach=False): + h = self.forward_conv(obs) + + if detach: + h = h.detach() + + h_fc = self.fc(h) + self.outputs['fc'] = h_fc + + h_norm = self.ln(h_fc) + self.outputs['ln'] = h_norm + + #out = torch.tanh(h_norm) + + mu, logstd = torch.chunk(h_norm, 2, dim=-1) + logstd = torch.tanh(logstd) + self.outputs['mu'] = mu + self.outputs['logstd'] = logstd + self.outputs['std'] = logstd.exp() + + out = self.reparameterize(mu, logstd) + self.outputs['tanh'] = out + return out + + def copy_conv_weights_from(self, source): + """Tie convolutional layers""" + # only tie conv layers + for i in range(self.num_layers): + tie_weights(src=source.convs[i], trg=self.convs[i]) + + def log(self, L, step, log_freq): + if step % log_freq != 0: + return + + for k, v in self.outputs.items(): + L.log_histogram('train_encoder/%s_hist' % k, v, step) + if len(v.shape) > 2: + L.log_image('train_encoder/%s_img' % k, v[0], step) + + for i in range(self.num_layers): + L.log_param('train_encoder/conv%s' % (i + 1), self.convs[i], step) + L.log_param('train_encoder/fc', self.fc, step) + L.log_param('train_encoder/ln', self.ln, step) class IdentityEncoder(nn.Module): def __init__(self, obs_shape, feature_dim, num_layers, num_filters):