Compare commits

..

No commits in common. "8b79f49bb928df881d8c3454ee54309d6f84a099" and "ca334452a04a15e16aa365633fb8eb67ca9e04d6" have entirely different histories.

4 changed files with 16 additions and 42 deletions

View File

@ -109,7 +109,10 @@ class PixelEncoder(nn.Module):
out_dim = OUT_DIM[num_layers] out_dim = OUT_DIM[num_layers]
self.fc = nn.Linear(num_filters * out_dim * out_dim, self.feature_dim * 2) self.fc = nn.Linear(num_filters * out_dim * out_dim, self.feature_dim * 2)
self.ln = nn.LayerNorm(self.feature_dim * 2) self.ln = nn.LayerNorm(self.feature_dim * 2)
<<<<<<< HEAD
self.combine = nn.Linear(self.feature_dim + 6, self.feature_dim) self.combine = nn.Linear(self.feature_dim + 6, self.feature_dim)
=======
>>>>>>> origin/tester_1
self.outputs = dict() self.outputs = dict()
@ -154,8 +157,12 @@ class PixelEncoder(nn.Module):
out = self.reparameterize(mu, logstd) out = self.reparameterize(mu, logstd)
self.outputs['tanh'] = out self.outputs['tanh'] = out
<<<<<<< HEAD
return out, mu, logstd return out, mu, logstd
=======
return out
>>>>>>> origin/tester_1
def copy_conv_weights_from(self, source): def copy_conv_weights_from(self, source):
"""Tie convolutional layers""" """Tie convolutional layers"""
# only tie conv layers # only tie conv layers

View File

@ -1,11 +1,10 @@
import os import os
import numpy as np
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
"""
def tabulate_events(dpath): def tabulate_events(dpath):
files = os.listdir(dpath)[0] files = os.listdir(dpath)[0]
summary_iterators = [EventAccumulator(os.path.join(dpath, files)).Reload()] summary_iterators = [EventAccumulator(os.path.join(dpath, files)).Reload()]
@ -44,43 +43,7 @@ for tag, values in events.items():
df = pd.DataFrame(data) df = pd.DataFrame(data)
print(df.head()) print(df.head())
exit()
plt.figure(figsize=(10,6)) plt.figure(figsize=(10,6))
sns.lineplot(data=df, x='step', y='value', hue='tag', ci='sd') sns.lineplot(data=df, x='step', y='value', hue='tag', ci='sd')
plt.show()
"""
from tensorboard.backend.event_processing import event_accumulator
def data_from_tb(files):
all_steps, all_rewards = [], []
for file in files:
ea = event_accumulator.EventAccumulator(file, size_guidance={'scalars': 0})
ea.Reload()
episode_rewards = ea.Scalars('train/episode_reward')
steps = [event.step for event in episode_rewards][:990000]
rewards = [event.value for event in episode_rewards][:990000]
all_steps.append(steps)
all_rewards.append(rewards)
return all_steps, all_rewards
files = ['/home/vedant/pytorch_sac_ae/log/runs/tb_21_05_2023-13_19_36/events.out.tfevents.1684667976.cpswkstn6-nvidia4090.1749060.0',
'/home/vedant/pytorch_sac_ae/log/runs/tb_22_05_2023-09_56_30/events.out.tfevents.1684742190.cpswkstn6-nvidia4090.1976229.0']
all_steps, all_rewards = data_from_tb(files)
mean_rewards = np.mean(all_rewards, axis=0)
std_rewards = np.std(all_rewards, axis=0)
mean_steps = np.mean(all_steps, axis=0)
df = pd.DataFrame({'Steps': mean_steps,'Rewards': mean_rewards,'Standard Deviation': std_rewards})
sns.relplot(x='Steps', y='Rewards', kind='line', data=df, ci="sd")
plt.fill_between(df['Steps'], df['Rewards'] - df['Standard Deviation'], df['Rewards'] + df['Standard Deviation'], color='b', alpha=.1)
plt.title("Mean Rewards vs Steps with Standard Deviation")
plt.show() plt.show()

View File

@ -416,14 +416,15 @@ class SacAeAgent(object):
h_dist_enc = torch.distributions.Normal(h_mu, h_logvar.exp()) h_dist_enc = torch.distributions.Normal(h_mu, h_logvar.exp())
h_dist_pred = torch.distributions.Normal(mean, std) h_dist_pred = torch.distributions.Normal(mean, std)
enc_loss = torch.distributions.kl.kl_divergence(h_dist_enc, h_dist_pred).mean() * 1e-2 enc_loss = torch.distributions.kl.kl_divergence(h_dist_enc, h_dist_pred).mean() * 1e-2
"""
with torch.no_grad(): with torch.no_grad():
z_pos, _ , _ = self.critic_target.encoder(next_obs_list[-1]) z_pos, _ , _ = self.critic_target.encoder(next_obs_list[-1])
z_out = self.critic_target.encoder.combine(torch.concat((z_pos, action), dim=-1)) z_out = self.critic_target.encoder.combine(torch.concat((z_pos, action), dim=-1))
logits = self.lb_loss.compute_logits(h, z_out) logits = self.lb_loss.compute_logits(h, z_out)
labels = torch.arange(logits.shape[0]).long().to(self.device) labels = torch.arange(logits.shape[0]).long().to(self.device)
lb_loss = nn.CrossEntropyLoss()(logits, labels) * 1e-2 lb_loss = nn.CrossEntropyLoss()(logits, labels) * 1e-2
"""
#with torch.no_grad(): #with torch.no_grad():
# z_pos, _ , _ = self.critic.encoder(next_obs_list[-1]) # z_pos, _ , _ = self.critic.encoder(next_obs_list[-1])
#ub_loss = club_loss(state_enc["sample"], mean, state_enc["logvar"], h) * 1e-1 #ub_loss = club_loss(state_enc["sample"], mean, state_enc["logvar"], h) * 1e-1
@ -436,7 +437,7 @@ class SacAeAgent(object):
ub_loss = torch.tensor(0.0) ub_loss = torch.tensor(0.0)
#enc_loss = torch.tensor(0.0) #enc_loss = torch.tensor(0.0)
#lb_loss = torch.tensor(0.0) lb_loss = torch.tensor(0.0)
#rec_loss = torch.tensor(0.0) #rec_loss = torch.tensor(0.0)
loss = rec_loss + enc_loss + lb_loss + ub_loss loss = rec_loss + enc_loss + lb_loss + ub_loss
self.encoder_optimizer.zero_grad() self.encoder_optimizer.zero_grad()

View File

@ -28,7 +28,10 @@ def parse_args():
parser.add_argument('--frame_stack', default=3, type=int) parser.add_argument('--frame_stack', default=3, type=int)
parser.add_argument('--img_source', default=None, type=str, choices=['color', 'noise', 'images', 'video', 'none']) parser.add_argument('--img_source', default=None, type=str, choices=['color', 'noise', 'images', 'video', 'none'])
parser.add_argument('--resource_files', type=str) parser.add_argument('--resource_files', type=str)
<<<<<<< HEAD
parser.add_argument('--resource_files_test', type=str) parser.add_argument('--resource_files_test', type=str)
=======
>>>>>>> origin/tester_1
parser.add_argument('--total_frames', default=10000, type=int) parser.add_argument('--total_frames', default=10000, type=int)
# replay buffer # replay buffer
parser.add_argument('--replay_buffer_capacity', default=100000, type=int) parser.add_argument('--replay_buffer_capacity', default=100000, type=int)