diff --git a/plotter/reward_plotter_final.py b/plotter/reward_plotter_final.py index cf2b938..2bbd9f5 100644 --- a/plotter/reward_plotter_final.py +++ b/plotter/reward_plotter_final.py @@ -16,8 +16,13 @@ def plot_csv_with_titles(paths_dict, x_axis, y_axis, subplot_titles, y_limits=(0 for path_, color in file_list: data = np.genfromtxt(path_, delimiter=',', skip_header=0, dtype=float) - mean = np.mean(data, axis=1) - std = np.std(data, axis=1) + if data.shape[0]> data.shape[1]: + mean = np.mean(data, axis=1) + std = np.std(data, axis=1) + else: + mean = np.mean(data, axis=0) + std = np.std(data, axis=0) + x = np.linspace(0, mean.shape[0], mean.shape[0]) axs[idx].plot(x, mean, color=color) @@ -46,21 +51,49 @@ def plot_csv_with_titles(paths_dict, x_axis, y_axis, subplot_titles, y_limits=(0 if __name__ == '__main__': - filepaths = [ - "/mnt/data/cp-ei-regular-20_0-15-1686582970_1112866.csv", - "/mnt/data/cp-ei-random-0_95-15-1686579274_2881138.csv", - "/mnt/data/cp-cb-random-1_0-15-1686575989_8880587.csv", - "/mnt/data/cp-pi-random-1_0-15-1686575712_588163.csv" + home_dir = os.path.expanduser('~') + file_path = os.path.join(home_dir, 'Documents/IntRLResults/CP-Results') + + filenames = [ + 'cp-e150r10-bf15-base/cp-ei-random-1_0-15-1690282051_2959082.csv', + 'cp-e150r10-bf15-noshaping/cp-pei-random-0_95-15-1690276946_1944933.csv', + 'cp-e150r10-bf15-noshaping/cp-pei-regular-25_0-15-1690290021_6843266.csv', + 'cp-e150r10-bf15-noshaping/cp-pei-improvement-0_1-15-1690292664_0382216.csv', + 'cp-e150r10-bf15-shaping/cp-ei-random-0_95-15-1690451164_0115042.csv', + 'cp-e150r10-bf15-shaping/cp-ei-regular-25_0-15-1690456185_1115792.csv', + 'cp-e150r10-bf15-shaping/cp-ei-improvement-0_1-15-1690465143_0114875.csv', + 'cp-e150r10-bf15-shaping/cp-pei-random-0_95-15-1690467921_7118568.csv', + 'cp-e150r10-bf15-shaping/cp-pei-regular-25_0-15-1690470012_0117908.csv', + 'cp-e150r10-bf15-shaping/cp-pei-improvement-0_1-15-1690472449_4115295.csv', ] + filepaths = [os.path.join(file_path, filename) for filename in filenames] # Demonstrating the adjusted function with subplot titles - titles = ["Plot 1", "Plot 2", "Plot 3", "Plot 4"] + titles = ["Preference", "Shaping", "Combination", "Regular"] data_dict_colored = { - 'subplot1': [(filepaths[0], 'blue'), (filepaths[1], 'green')], - 'subplot2': [(filepaths[2], 'red')], - 'subplot3': [(filepaths[3], 'purple')], - 'subplot4': [] + 'subplot1': [(filepaths[0], 'C0'), (filepaths[1], 'C1'), (filepaths[2], 'C2'), (filepaths[3], 'C3')], + 'subplot2': [(filepaths[0], 'C0'), (filepaths[4], 'C1'), (filepaths[5], 'C2'), (filepaths[6], 'C3')], + 'subplot3': [(filepaths[0], 'C0'), (filepaths[7], 'C1'), (filepaths[8], 'C2'), (filepaths[9], 'C3')], + 'subplot4': [(filepaths[0], 'C0'), (filepaths[2], 'C6'), (filepaths[5], 'C8'), (filepaths[8], 'C5')], } plot_csv_with_titles(data_dict_colored, 'Episodes', 'Reward', titles) + + file_path_reacher = os.path.join(home_dir, 'Documents/IntRLResults/RE-Results') + filenames_reacher = ['base_line/re-ei-random-1_0-5-1694370994_0363934.csv', + 'shaping/re-ei-random-1_0-10-1694359559_616903.csv', + 'shaping/re-ei-regular-10_0-5-1694371946_5364418.csv' + ] + + filepaths_reacher = [os.path.join(file_path_reacher, filename) for filename in filenames_reacher] + + titles_reacher = ["Shaping"] + + data_dict_reacher = { + 'subplot1': [(filepaths_reacher[0], 'C0'), (filepaths_reacher[1], 'C1'), (filepaths_reacher[2], 'C2')] + } + + plot_csv_with_titles(data_dict_reacher, 'Episodes', 'Reward', titles_reacher, y_limits=(-150, 50)) + + diff --git a/runner/BODmRunner.py b/runner/BODmRunner.py index a507a9b..829c4e8 100644 --- a/runner/BODmRunner.py +++ b/runner/BODmRunner.py @@ -18,14 +18,15 @@ warnings.filterwarnings("ignore", category=ConvergenceWarning) seed = None random_state = np.random.RandomState(seed=seed) -env = suite.load('reacher', 'hard', task_kwargs={'random': random_state}) +env = suite.load('finger', 'turn_easy', task_kwargs={'random': random_state}) spec = env.action_spec() +print(spec) time_step = env.reset() nr_steps = 100 -nr_runs = 10 +nr_runs = 1 nr_dims = spec.shape[0] -iteration_steps = 50 +iteration_steps = 10 acquisition_fun = "Expected Improvement" nr_weights = 15 @@ -63,6 +64,7 @@ def runner(env_, policy_): for step in range(nr_steps): action = policy_[step] output = env_.step(action) + print(output.reward) if output.reward != 0: reward += output.reward * 10