diff --git a/plotter/reward_plotter.py b/plotter/reward_plotter.py new file mode 100644 index 0000000..3b546ea --- /dev/null +++ b/plotter/reward_plotter.py @@ -0,0 +1,39 @@ +import numpy as np +import matplotlib.pyplot as plt +import os + +def plot_csv(paths, x_axis, y_axis): + for path_ in paths: + data = np.genfromtxt(path_, delimiter=',', skip_header=1, dtype=float) + + mean = np.mean(data, axis=1) + std = np.std(data, axis=1) + + x = np.linspace(0, mean.shape[0], mean.shape[0]) + + # Extract the first part of the filename and use it as a label + label = os.path.basename(path_).split('-')[0:3] + label = f"{label[0]} {float(label[1].replace('_','.'))}, nrbfs = {int(label[2])}" + + plt.plot(x, mean, label=label) + plt.fill_between( + x, + mean - 1.96 * std, + mean + 1.96 * std, + alpha=0.5 + ) + plt.xlabel(x_axis) + plt.xlim([0, mean.shape[0]]) + plt.ylabel(y_axis) + plt.grid(True) + plt.legend(loc="best") + plt.show() + + +if __name__ == '__main__': + + filenames = ['random-1_0-5-1685552722_2243946.csv'] + home_dir = os.path.expanduser('~') + file_path = os.path.join(home_dir, 'Documents/IntRLResults') + paths = [os.path.join(file_path, filename) for filename in filenames] + plot_csv(paths, 'Episodes', 'Reward') diff --git a/runner/BOGymRunner.py b/runner/BOGymRunner.py index c7ae320..524c5b8 100644 --- a/runner/BOGymRunner.py +++ b/runner/BOGymRunner.py @@ -5,7 +5,8 @@ import numpy as np import matplotlib.pyplot as plt # from ToyTask.MountainCarGym import Continuous_MountainCarEnv -from ToyTask.Pendulum import PendulumEnv +# from ToyTask.Pendulum import PendulumEnv +from ToyTask.Cartpole import CartPoleEnv import warnings from sklearn.exceptions import ConvergenceWarning @@ -13,12 +14,12 @@ from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings("ignore", category=ConvergenceWarning) # BO parameters -env = PendulumEnv() +env = CartPoleEnv() nr_steps = 100 acquisition_fun = 'ei' -iteration_steps = 100 +iteration_steps = 50 -nr_runs = 100 +nr_runs = 10 # storage arrays finished_store = np.zeros((1, nr_runs)) @@ -82,7 +83,7 @@ def main(): global finished_store, best_policy, reward_store bo = BayesianOptimization(env, nr_steps, acq=acquisition_fun) for i in range(nr_runs): - print('Iteration:', str(i)) + print('Runs:', str(i)) bo.env_seed = int(np.random.randint(1, 2147483647, 1)[0]) bo.initialize() for j in range(iteration_steps):