Compare commits
No commits in common. "8b79f49bb928df881d8c3454ee54309d6f84a099" and "ca334452a04a15e16aa365633fb8eb67ca9e04d6" have entirely different histories.
8b79f49bb9
...
ca334452a0
@ -109,7 +109,10 @@ class PixelEncoder(nn.Module):
|
|||||||
out_dim = OUT_DIM[num_layers]
|
out_dim = OUT_DIM[num_layers]
|
||||||
self.fc = nn.Linear(num_filters * out_dim * out_dim, self.feature_dim * 2)
|
self.fc = nn.Linear(num_filters * out_dim * out_dim, self.feature_dim * 2)
|
||||||
self.ln = nn.LayerNorm(self.feature_dim * 2)
|
self.ln = nn.LayerNorm(self.feature_dim * 2)
|
||||||
|
<<<<<<< HEAD
|
||||||
self.combine = nn.Linear(self.feature_dim + 6, self.feature_dim)
|
self.combine = nn.Linear(self.feature_dim + 6, self.feature_dim)
|
||||||
|
=======
|
||||||
|
>>>>>>> origin/tester_1
|
||||||
|
|
||||||
self.outputs = dict()
|
self.outputs = dict()
|
||||||
|
|
||||||
@ -154,7 +157,11 @@ class PixelEncoder(nn.Module):
|
|||||||
|
|
||||||
out = self.reparameterize(mu, logstd)
|
out = self.reparameterize(mu, logstd)
|
||||||
self.outputs['tanh'] = out
|
self.outputs['tanh'] = out
|
||||||
|
<<<<<<< HEAD
|
||||||
return out, mu, logstd
|
return out, mu, logstd
|
||||||
|
=======
|
||||||
|
return out
|
||||||
|
>>>>>>> origin/tester_1
|
||||||
|
|
||||||
def copy_conv_weights_from(self, source):
|
def copy_conv_weights_from(self, source):
|
||||||
"""Tie convolutional layers"""
|
"""Tie convolutional layers"""
|
||||||
|
@ -1,11 +1,10 @@
|
|||||||
import os
|
import os
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
|
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
|
||||||
|
|
||||||
"""
|
|
||||||
def tabulate_events(dpath):
|
def tabulate_events(dpath):
|
||||||
files = os.listdir(dpath)[0]
|
files = os.listdir(dpath)[0]
|
||||||
summary_iterators = [EventAccumulator(os.path.join(dpath, files)).Reload()]
|
summary_iterators = [EventAccumulator(os.path.join(dpath, files)).Reload()]
|
||||||
@ -44,43 +43,7 @@ for tag, values in events.items():
|
|||||||
|
|
||||||
df = pd.DataFrame(data)
|
df = pd.DataFrame(data)
|
||||||
print(df.head())
|
print(df.head())
|
||||||
exit()
|
|
||||||
|
|
||||||
plt.figure(figsize=(10,6))
|
plt.figure(figsize=(10,6))
|
||||||
sns.lineplot(data=df, x='step', y='value', hue='tag', ci='sd')
|
sns.lineplot(data=df, x='step', y='value', hue='tag', ci='sd')
|
||||||
plt.show()
|
plt.show()
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
from tensorboard.backend.event_processing import event_accumulator
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def data_from_tb(files):
|
|
||||||
all_steps, all_rewards = [], []
|
|
||||||
for file in files:
|
|
||||||
ea = event_accumulator.EventAccumulator(file, size_guidance={'scalars': 0})
|
|
||||||
ea.Reload()
|
|
||||||
|
|
||||||
episode_rewards = ea.Scalars('train/episode_reward')
|
|
||||||
steps = [event.step for event in episode_rewards][:990000]
|
|
||||||
rewards = [event.value for event in episode_rewards][:990000]
|
|
||||||
all_steps.append(steps)
|
|
||||||
all_rewards.append(rewards)
|
|
||||||
return all_steps, all_rewards
|
|
||||||
|
|
||||||
|
|
||||||
files = ['/home/vedant/pytorch_sac_ae/log/runs/tb_21_05_2023-13_19_36/events.out.tfevents.1684667976.cpswkstn6-nvidia4090.1749060.0',
|
|
||||||
'/home/vedant/pytorch_sac_ae/log/runs/tb_22_05_2023-09_56_30/events.out.tfevents.1684742190.cpswkstn6-nvidia4090.1976229.0']
|
|
||||||
|
|
||||||
all_steps, all_rewards = data_from_tb(files)
|
|
||||||
mean_rewards = np.mean(all_rewards, axis=0)
|
|
||||||
std_rewards = np.std(all_rewards, axis=0)
|
|
||||||
mean_steps = np.mean(all_steps, axis=0)
|
|
||||||
|
|
||||||
df = pd.DataFrame({'Steps': mean_steps,'Rewards': mean_rewards,'Standard Deviation': std_rewards})
|
|
||||||
|
|
||||||
sns.relplot(x='Steps', y='Rewards', kind='line', data=df, ci="sd")
|
|
||||||
plt.fill_between(df['Steps'], df['Rewards'] - df['Standard Deviation'], df['Rewards'] + df['Standard Deviation'], color='b', alpha=.1)
|
|
||||||
plt.title("Mean Rewards vs Steps with Standard Deviation")
|
|
||||||
plt.show()
|
|
@ -417,13 +417,14 @@ class SacAeAgent(object):
|
|||||||
h_dist_pred = torch.distributions.Normal(mean, std)
|
h_dist_pred = torch.distributions.Normal(mean, std)
|
||||||
enc_loss = torch.distributions.kl.kl_divergence(h_dist_enc, h_dist_pred).mean() * 1e-2
|
enc_loss = torch.distributions.kl.kl_divergence(h_dist_enc, h_dist_pred).mean() * 1e-2
|
||||||
|
|
||||||
|
"""
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
z_pos, _ , _ = self.critic_target.encoder(next_obs_list[-1])
|
z_pos, _ , _ = self.critic_target.encoder(next_obs_list[-1])
|
||||||
z_out = self.critic_target.encoder.combine(torch.concat((z_pos, action), dim=-1))
|
z_out = self.critic_target.encoder.combine(torch.concat((z_pos, action), dim=-1))
|
||||||
logits = self.lb_loss.compute_logits(h, z_out)
|
logits = self.lb_loss.compute_logits(h, z_out)
|
||||||
labels = torch.arange(logits.shape[0]).long().to(self.device)
|
labels = torch.arange(logits.shape[0]).long().to(self.device)
|
||||||
lb_loss = nn.CrossEntropyLoss()(logits, labels) * 1e-2
|
lb_loss = nn.CrossEntropyLoss()(logits, labels) * 1e-2
|
||||||
|
"""
|
||||||
#with torch.no_grad():
|
#with torch.no_grad():
|
||||||
# z_pos, _ , _ = self.critic.encoder(next_obs_list[-1])
|
# z_pos, _ , _ = self.critic.encoder(next_obs_list[-1])
|
||||||
#ub_loss = club_loss(state_enc["sample"], mean, state_enc["logvar"], h) * 1e-1
|
#ub_loss = club_loss(state_enc["sample"], mean, state_enc["logvar"], h) * 1e-1
|
||||||
@ -436,7 +437,7 @@ class SacAeAgent(object):
|
|||||||
|
|
||||||
ub_loss = torch.tensor(0.0)
|
ub_loss = torch.tensor(0.0)
|
||||||
#enc_loss = torch.tensor(0.0)
|
#enc_loss = torch.tensor(0.0)
|
||||||
#lb_loss = torch.tensor(0.0)
|
lb_loss = torch.tensor(0.0)
|
||||||
#rec_loss = torch.tensor(0.0)
|
#rec_loss = torch.tensor(0.0)
|
||||||
loss = rec_loss + enc_loss + lb_loss + ub_loss
|
loss = rec_loss + enc_loss + lb_loss + ub_loss
|
||||||
self.encoder_optimizer.zero_grad()
|
self.encoder_optimizer.zero_grad()
|
||||||
|
3
train.py
3
train.py
@ -28,7 +28,10 @@ def parse_args():
|
|||||||
parser.add_argument('--frame_stack', default=3, type=int)
|
parser.add_argument('--frame_stack', default=3, type=int)
|
||||||
parser.add_argument('--img_source', default=None, type=str, choices=['color', 'noise', 'images', 'video', 'none'])
|
parser.add_argument('--img_source', default=None, type=str, choices=['color', 'noise', 'images', 'video', 'none'])
|
||||||
parser.add_argument('--resource_files', type=str)
|
parser.add_argument('--resource_files', type=str)
|
||||||
|
<<<<<<< HEAD
|
||||||
parser.add_argument('--resource_files_test', type=str)
|
parser.add_argument('--resource_files_test', type=str)
|
||||||
|
=======
|
||||||
|
>>>>>>> origin/tester_1
|
||||||
parser.add_argument('--total_frames', default=10000, type=int)
|
parser.add_argument('--total_frames', default=10000, type=int)
|
||||||
# replay buffer
|
# replay buffer
|
||||||
parser.add_argument('--replay_buffer_capacity', default=100000, type=int)
|
parser.add_argument('--replay_buffer_capacity', default=100000, type=int)
|
||||||
|
Loading…
Reference in New Issue
Block a user