Deepmind suite tested

2023-09-10 20:54:34 +02:00 · 2023-09-10 20:54:34 +02:00 · ba31dfd7f7
commit ba31dfd7f7
parent 82d2b61e6b
2 changed files with 50 additions and 15 deletions
--- a/plotter/reward_plotter_final.py
+++ b/plotter/reward_plotter_final.py
@ -16,8 +16,13 @@ def plot_csv_with_titles(paths_dict, x_axis, y_axis, subplot_titles, y_limits=(0
        for path_, color in file_list:
            data = np.genfromtxt(path_, delimiter=',', skip_header=0, dtype=float)
-            mean = np.mean(data, axis=1)
+            if data.shape[0]> data.shape[1]:
-            std = np.std(data, axis=1)
+                mean = np.mean(data, axis=1)
                std = np.std(data, axis=1)
            else:
                mean = np.mean(data, axis=0)
                std = np.std(data, axis=0)
            x = np.linspace(0, mean.shape[0], mean.shape[0])
            axs[idx].plot(x, mean, color=color)
@ -46,21 +51,49 @@ def plot_csv_with_titles(paths_dict, x_axis, y_axis, subplot_titles, y_limits=(0
 if __name__ == '__main__':
-    filepaths = [
+    home_dir = os.path.expanduser('~')
-        "/mnt/data/cp-ei-regular-20_0-15-1686582970_1112866.csv",
+    file_path = os.path.join(home_dir, 'Documents/IntRLResults/CP-Results')
-        "/mnt/data/cp-ei-random-0_95-15-1686579274_2881138.csv",
+
-        "/mnt/data/cp-cb-random-1_0-15-1686575989_8880587.csv",
+    filenames = [
-        "/mnt/data/cp-pi-random-1_0-15-1686575712_588163.csv"
+        'cp-e150r10-bf15-base/cp-ei-random-1_0-15-1690282051_2959082.csv',
        'cp-e150r10-bf15-noshaping/cp-pei-random-0_95-15-1690276946_1944933.csv',
        'cp-e150r10-bf15-noshaping/cp-pei-regular-25_0-15-1690290021_6843266.csv',
        'cp-e150r10-bf15-noshaping/cp-pei-improvement-0_1-15-1690292664_0382216.csv',
        'cp-e150r10-bf15-shaping/cp-ei-random-0_95-15-1690451164_0115042.csv',
        'cp-e150r10-bf15-shaping/cp-ei-regular-25_0-15-1690456185_1115792.csv',
        'cp-e150r10-bf15-shaping/cp-ei-improvement-0_1-15-1690465143_0114875.csv',
        'cp-e150r10-bf15-shaping/cp-pei-random-0_95-15-1690467921_7118568.csv',
        'cp-e150r10-bf15-shaping/cp-pei-regular-25_0-15-1690470012_0117908.csv',
        'cp-e150r10-bf15-shaping/cp-pei-improvement-0_1-15-1690472449_4115295.csv',
    ]
    filepaths = [os.path.join(file_path, filename) for filename in filenames]
    # Demonstrating the adjusted function with subplot titles
-    titles = ["Plot 1", "Plot 2", "Plot 3", "Plot 4"]
+    titles = ["Preference", "Shaping", "Combination", "Regular"]
    data_dict_colored = {
-        'subplot1': [(filepaths[0], 'blue'), (filepaths[1], 'green')],
+        'subplot1': [(filepaths[0], 'C0'), (filepaths[1], 'C1'), (filepaths[2], 'C2'), (filepaths[3], 'C3')],
-        'subplot2': [(filepaths[2], 'red')],
+        'subplot2': [(filepaths[0], 'C0'), (filepaths[4], 'C1'), (filepaths[5], 'C2'), (filepaths[6], 'C3')],
-        'subplot3': [(filepaths[3], 'purple')],
+        'subplot3': [(filepaths[0], 'C0'), (filepaths[7], 'C1'), (filepaths[8], 'C2'), (filepaths[9], 'C3')],
-        'subplot4': []
+        'subplot4': [(filepaths[0], 'C0'), (filepaths[2], 'C6'), (filepaths[5], 'C8'), (filepaths[8], 'C5')],
    }
    plot_csv_with_titles(data_dict_colored, 'Episodes', 'Reward', titles)
    file_path_reacher = os.path.join(home_dir, 'Documents/IntRLResults/RE-Results')
    filenames_reacher = ['base_line/re-ei-random-1_0-5-1694370994_0363934.csv',
                         'shaping/re-ei-random-1_0-10-1694359559_616903.csv',
                         'shaping/re-ei-regular-10_0-5-1694371946_5364418.csv'
                         ]
    filepaths_reacher = [os.path.join(file_path_reacher, filename) for filename in filenames_reacher]
    titles_reacher = ["Shaping"]
    data_dict_reacher = {
        'subplot1': [(filepaths_reacher[0], 'C0'), (filepaths_reacher[1], 'C1'), (filepaths_reacher[2], 'C2')]
    }
    plot_csv_with_titles(data_dict_reacher, 'Episodes', 'Reward', titles_reacher, y_limits=(-150, 50))
--- a/runner/BODmRunner.py
+++ b/runner/BODmRunner.py
@ -18,14 +18,15 @@ warnings.filterwarnings("ignore", category=ConvergenceWarning)
 seed = None
 random_state = np.random.RandomState(seed=seed)
-env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})
+env = suite.load('finger', 'turn_easy', task_kwargs={'random': random_state})
 spec = env.action_spec()
 print(spec)
 time_step = env.reset()
 nr_steps = 100
-nr_runs = 10
+nr_runs = 1
 nr_dims = spec.shape[0]
-iteration_steps = 50
+iteration_steps = 10
 acquisition_fun = "Expected Improvement"
 nr_weights = 15
@ -63,6 +64,7 @@ def runner(env_, policy_):
    for step in range(nr_steps):
        action = policy_[step]
        output = env_.step(action)
        print(output.reward)
        if output.reward != 0:
            reward += output.reward * 10