Compare commits

...

2 Commits

Author SHA1 Message Date
8b79f49bb9 Add graphs 2023-05-25 17:53:46 +02:00
82e8a23918 Adding files 2023-05-25 17:51:31 +02:00
4 changed files with 42 additions and 16 deletions

View File

@ -109,10 +109,7 @@ class PixelEncoder(nn.Module):
out_dim = OUT_DIM[num_layers] out_dim = OUT_DIM[num_layers]
self.fc = nn.Linear(num_filters * out_dim * out_dim, self.feature_dim * 2) self.fc = nn.Linear(num_filters * out_dim * out_dim, self.feature_dim * 2)
self.ln = nn.LayerNorm(self.feature_dim * 2) self.ln = nn.LayerNorm(self.feature_dim * 2)
<<<<<<< HEAD
self.combine = nn.Linear(self.feature_dim + 6, self.feature_dim) self.combine = nn.Linear(self.feature_dim + 6, self.feature_dim)
=======
>>>>>>> origin/tester_1
self.outputs = dict() self.outputs = dict()
@ -157,12 +154,8 @@ class PixelEncoder(nn.Module):
out = self.reparameterize(mu, logstd) out = self.reparameterize(mu, logstd)
self.outputs['tanh'] = out self.outputs['tanh'] = out
<<<<<<< HEAD
return out, mu, logstd return out, mu, logstd
=======
return out
>>>>>>> origin/tester_1
def copy_conv_weights_from(self, source): def copy_conv_weights_from(self, source):
"""Tie convolutional layers""" """Tie convolutional layers"""
# only tie conv layers # only tie conv layers

View File

@ -1,10 +1,11 @@
import os import os
import numpy as np
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
"""
def tabulate_events(dpath): def tabulate_events(dpath):
files = os.listdir(dpath)[0] files = os.listdir(dpath)[0]
summary_iterators = [EventAccumulator(os.path.join(dpath, files)).Reload()] summary_iterators = [EventAccumulator(os.path.join(dpath, files)).Reload()]
@ -43,7 +44,43 @@ for tag, values in events.items():
df = pd.DataFrame(data) df = pd.DataFrame(data)
print(df.head()) print(df.head())
exit()
plt.figure(figsize=(10,6)) plt.figure(figsize=(10,6))
sns.lineplot(data=df, x='step', y='value', hue='tag', ci='sd') sns.lineplot(data=df, x='step', y='value', hue='tag', ci='sd')
plt.show()
"""
from tensorboard.backend.event_processing import event_accumulator
def data_from_tb(files):
all_steps, all_rewards = [], []
for file in files:
ea = event_accumulator.EventAccumulator(file, size_guidance={'scalars': 0})
ea.Reload()
episode_rewards = ea.Scalars('train/episode_reward')
steps = [event.step for event in episode_rewards][:990000]
rewards = [event.value for event in episode_rewards][:990000]
all_steps.append(steps)
all_rewards.append(rewards)
return all_steps, all_rewards
files = ['/home/vedant/pytorch_sac_ae/log/runs/tb_21_05_2023-13_19_36/events.out.tfevents.1684667976.cpswkstn6-nvidia4090.1749060.0',
'/home/vedant/pytorch_sac_ae/log/runs/tb_22_05_2023-09_56_30/events.out.tfevents.1684742190.cpswkstn6-nvidia4090.1976229.0']
all_steps, all_rewards = data_from_tb(files)
mean_rewards = np.mean(all_rewards, axis=0)
std_rewards = np.std(all_rewards, axis=0)
mean_steps = np.mean(all_steps, axis=0)
df = pd.DataFrame({'Steps': mean_steps,'Rewards': mean_rewards,'Standard Deviation': std_rewards})
sns.relplot(x='Steps', y='Rewards', kind='line', data=df, ci="sd")
plt.fill_between(df['Steps'], df['Rewards'] - df['Standard Deviation'], df['Rewards'] + df['Standard Deviation'], color='b', alpha=.1)
plt.title("Mean Rewards vs Steps with Standard Deviation")
plt.show() plt.show()

View File

@ -416,15 +416,14 @@ class SacAeAgent(object):
h_dist_enc = torch.distributions.Normal(h_mu, h_logvar.exp()) h_dist_enc = torch.distributions.Normal(h_mu, h_logvar.exp())
h_dist_pred = torch.distributions.Normal(mean, std) h_dist_pred = torch.distributions.Normal(mean, std)
enc_loss = torch.distributions.kl.kl_divergence(h_dist_enc, h_dist_pred).mean() * 1e-2 enc_loss = torch.distributions.kl.kl_divergence(h_dist_enc, h_dist_pred).mean() * 1e-2
"""
with torch.no_grad(): with torch.no_grad():
z_pos, _ , _ = self.critic_target.encoder(next_obs_list[-1]) z_pos, _ , _ = self.critic_target.encoder(next_obs_list[-1])
z_out = self.critic_target.encoder.combine(torch.concat((z_pos, action), dim=-1)) z_out = self.critic_target.encoder.combine(torch.concat((z_pos, action), dim=-1))
logits = self.lb_loss.compute_logits(h, z_out) logits = self.lb_loss.compute_logits(h, z_out)
labels = torch.arange(logits.shape[0]).long().to(self.device) labels = torch.arange(logits.shape[0]).long().to(self.device)
lb_loss = nn.CrossEntropyLoss()(logits, labels) * 1e-2 lb_loss = nn.CrossEntropyLoss()(logits, labels) * 1e-2
"""
#with torch.no_grad(): #with torch.no_grad():
# z_pos, _ , _ = self.critic.encoder(next_obs_list[-1]) # z_pos, _ , _ = self.critic.encoder(next_obs_list[-1])
#ub_loss = club_loss(state_enc["sample"], mean, state_enc["logvar"], h) * 1e-1 #ub_loss = club_loss(state_enc["sample"], mean, state_enc["logvar"], h) * 1e-1
@ -437,7 +436,7 @@ class SacAeAgent(object):
ub_loss = torch.tensor(0.0) ub_loss = torch.tensor(0.0)
#enc_loss = torch.tensor(0.0) #enc_loss = torch.tensor(0.0)
lb_loss = torch.tensor(0.0) #lb_loss = torch.tensor(0.0)
#rec_loss = torch.tensor(0.0) #rec_loss = torch.tensor(0.0)
loss = rec_loss + enc_loss + lb_loss + ub_loss loss = rec_loss + enc_loss + lb_loss + ub_loss
self.encoder_optimizer.zero_grad() self.encoder_optimizer.zero_grad()

View File

@ -28,10 +28,7 @@ def parse_args():
parser.add_argument('--frame_stack', default=3, type=int) parser.add_argument('--frame_stack', default=3, type=int)
parser.add_argument('--img_source', default=None, type=str, choices=['color', 'noise', 'images', 'video', 'none']) parser.add_argument('--img_source', default=None, type=str, choices=['color', 'noise', 'images', 'video', 'none'])
parser.add_argument('--resource_files', type=str) parser.add_argument('--resource_files', type=str)
<<<<<<< HEAD
parser.add_argument('--resource_files_test', type=str) parser.add_argument('--resource_files_test', type=str)
=======
>>>>>>> origin/tester_1
parser.add_argument('--total_frames', default=10000, type=int) parser.add_argument('--total_frames', default=10000, type=int)
# replay buffer # replay buffer
parser.add_argument('--replay_buffer_capacity', default=100000, type=int) parser.add_argument('--replay_buffer_capacity', default=100000, type=int)