tia/Dreamer/graph_plot.py

import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker


def binning(xs, ys, bins, reducer):
    binned_xs = np.arange(xs.min(), xs.max() + 1e-10, bins)
    binned_ys = []
    for start, stop in zip([-np.inf] + list(binned_xs), binned_xs):
        left = (xs <= start).sum()
        right = (xs <= stop).sum()
        binned_ys.append(reducer(ys[left:right]))
    binned_ys = np.array(binned_ys)
    return binned_xs, binned_ys


def plot_data(parent_dir, tag_filter="test/return", xaxis='step', value="AverageEpRet", condition="Condition1", smooth=1, bins=30000, xticks=5, yticks=5):
    # List to store all DataFrames
    data = []

    # Traversing through each subfolder in the parent directory
    for subfolder in os.listdir(parent_dir):
        json_dir = os.path.join(parent_dir, subfolder)
        if not os.path.isdir(json_dir):
            continue
        # Read each JSON file separately
        for json_file in os.listdir(json_dir):
            if not json_file.endswith('.jsonl'):
                continue
            # Read the data from the JSON file
            df = pd.read_json(os.path.join(json_dir, json_file), lines=True)

            # Check if tag_filter exists in DataFrame
            if tag_filter not in df.columns:
                continue

            df = df[['step', tag_filter]].dropna().sort_values(by='step')

            # Apply binning
            xs, ys = binning(df['step'].to_numpy(), df[tag_filter].to_numpy(), bins, np.nanmean)

            # Replace original data with binned data
            df = pd.DataFrame({ 'step': xs, tag_filter: ys })

            # Append the DataFrame to the list
            data.append(df)

    # Combine all DataFrames
    combined_df = pd.concat(data, ignore_index=True)

    # Plotting the combined DataFrame
    sns.set(style="white", font_scale=1.5)
    plot = sns.lineplot(data=combined_df, x=xaxis, y=tag_filter, errorbar='sd')

    ax = plot.axes
    ax.ticklabel_format(axis="x", scilimits=(5, 5))
    steps = [1, 2, 2.5, 5, 10]
    ax.xaxis.set_major_locator(ticker.MaxNLocator(xticks, steps=steps))
    ax.yaxis.set_major_locator(ticker.MaxNLocator(yticks, steps=steps))

    xlim = [+np.inf, -np.inf]
    xlim = [min(xlim[0], xs.min()), max(xlim[1], xs.max())]
    ax.set_xlim(xlim)
    #plt.xlim([0, max])

    #plt.legend(loc='best').set_draggable(True)
    plt.tight_layout(pad=0.5)
    plt.show()

# Call the function
plot_data('/media/vedant/cpsDataStorageWK/Vedant/tia_logs/dmc_cheetah_run_driving/tia/')


exit()


def plot_vanilla(parent_dir, tag_filter="train/return", smoothing=0.99):
    # List to store all EMAs
    emas = []

    # Traversing through each subfolder in the parent directory
    for subfolder in os.listdir(parent_dir):
        json_dir = os.path.join(parent_dir, subfolder)
        if not os.path.isdir(json_dir):
            continue
        # Read each JSON file separately
        for json_file in os.listdir(json_dir):
            if not json_file.endswith('.jsonl'):
                continue
            # Read the data from the JSON file
            df = pd.read_json(os.path.join(json_dir, json_file), lines=True)

            # Check if tag_filter exists in DataFrame
            if tag_filter not in df.columns:
                continue

            df = df[['step', tag_filter]].sort_values(by='step')

            # Calculate exponential moving average for the smoothing value
            df['EMA'] = df[tag_filter].ewm(alpha=smoothing, adjust=False).mean()

            # Append the EMA DataFrame to the emas list
            emas.append(df)

    # Concatenate all EMAs into a single DataFrame and calculate mean and standard deviation
    all_emas = pd.concat(emas).groupby('step')['EMA']
    mean_emas = all_emas.mean()
    std_emas = all_emas.std()

    # Plotting begins here
    sns.set_style("whitegrid", {'axes.grid' : True, 'axes.edgecolor':'black'})
    fig = plt.figure()
    plt.clf()
    ax = fig.gca()

    # Plot mean and standard deviation of EMAs
    plt.plot(mean_emas.index, mean_emas, color='blue')
    plt.fill_between(std_emas.index, (mean_emas-std_emas), (mean_emas+std_emas), color='blue', alpha=.1)

    plt.xlabel('Training Episodes $(\\times10^6)$', fontsize=22)
    plt.ylabel('Average return', fontsize=22)
    lgd=plt.legend(frameon=True, fancybox=True, prop={'weight':'bold', 'size':14}, loc="best")
    #plt.title('Title', fontsize=14)
    ax = plt.gca()

    plt.setp(ax.get_xticklabels(), fontsize=16)
    plt.setp(ax.get_yticklabels(), fontsize=16)
    sns.despine()
    plt.tight_layout()
    plt.show()

# Call the function
plot_vanilla('/media/vedant/cpsDataStorageWK/Vedant/tia_logs/dmc_cheetah_run_driving/tia/')


"""
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Set the path to the JSON file
parent_dir = '/media/vedant/cpsDataStorageWK/Vedant/tia_logs/dmc_cheetah_run_driving/tia/'

# Specific tag to filter
tag_filter = "train/return"

# Collect data from all JSON files
data = []

# Smoothing values
smoothing = 0.001  # Change num to set the number of smoothing values

# List to store all EMAs
emas = []

# Traversing through each subfolder in the parent directory
for subfolder in os.listdir(parent_dir):
    json_dir = os.path.join(parent_dir, subfolder)
    if not os.path.isdir(json_dir):
        continue
    # Read each JSON file separately
    for json_file in os.listdir(json_dir):
        if not json_file.endswith('.jsonl'):
            continue
        # Read the data from the JSON file
        df = pd.read_json(os.path.join(json_dir, json_file), lines=True)

        # Check if tag_filter exists in DataFrame
        if tag_filter not in df.columns:
            continue

        df = df[['step', tag_filter]].sort_values(by='step')

        # Calculate exponential moving average for the smoothing value
        df['EMA'] = df[tag_filter].ewm(alpha=smoothing, adjust=False).mean()

        # Append the EMA DataFrame to the emas list
        emas.append(df)

# Concatenate all EMAs into a single DataFrame and calculate mean and standard deviation
all_emas = pd.concat(emas).groupby('step')['EMA']
mean_emas = all_emas.mean()
std_emas = all_emas.std()

# Plot mean and standard deviation of EMAs
plt.figure(figsize=(10, 6))
plt.plot(mean_emas.index, mean_emas)
plt.fill_between(std_emas.index, (mean_emas-std_emas), (mean_emas+std_emas), color='b', alpha=.1)
plt.legend()
plt.show()
"""