Deepmind suite tested

This commit is contained in:
Niko Feith 2023-09-10 20:54:34 +02:00
parent 82d2b61e6b
commit ba31dfd7f7
2 changed files with 50 additions and 15 deletions

View File

@ -16,8 +16,13 @@ def plot_csv_with_titles(paths_dict, x_axis, y_axis, subplot_titles, y_limits=(0
for path_, color in file_list: for path_, color in file_list:
data = np.genfromtxt(path_, delimiter=',', skip_header=0, dtype=float) data = np.genfromtxt(path_, delimiter=',', skip_header=0, dtype=float)
mean = np.mean(data, axis=1) if data.shape[0]> data.shape[1]:
std = np.std(data, axis=1) mean = np.mean(data, axis=1)
std = np.std(data, axis=1)
else:
mean = np.mean(data, axis=0)
std = np.std(data, axis=0)
x = np.linspace(0, mean.shape[0], mean.shape[0]) x = np.linspace(0, mean.shape[0], mean.shape[0])
axs[idx].plot(x, mean, color=color) axs[idx].plot(x, mean, color=color)
@ -46,21 +51,49 @@ def plot_csv_with_titles(paths_dict, x_axis, y_axis, subplot_titles, y_limits=(0
if __name__ == '__main__': if __name__ == '__main__':
filepaths = [ home_dir = os.path.expanduser('~')
"/mnt/data/cp-ei-regular-20_0-15-1686582970_1112866.csv", file_path = os.path.join(home_dir, 'Documents/IntRLResults/CP-Results')
"/mnt/data/cp-ei-random-0_95-15-1686579274_2881138.csv",
"/mnt/data/cp-cb-random-1_0-15-1686575989_8880587.csv", filenames = [
"/mnt/data/cp-pi-random-1_0-15-1686575712_588163.csv" 'cp-e150r10-bf15-base/cp-ei-random-1_0-15-1690282051_2959082.csv',
'cp-e150r10-bf15-noshaping/cp-pei-random-0_95-15-1690276946_1944933.csv',
'cp-e150r10-bf15-noshaping/cp-pei-regular-25_0-15-1690290021_6843266.csv',
'cp-e150r10-bf15-noshaping/cp-pei-improvement-0_1-15-1690292664_0382216.csv',
'cp-e150r10-bf15-shaping/cp-ei-random-0_95-15-1690451164_0115042.csv',
'cp-e150r10-bf15-shaping/cp-ei-regular-25_0-15-1690456185_1115792.csv',
'cp-e150r10-bf15-shaping/cp-ei-improvement-0_1-15-1690465143_0114875.csv',
'cp-e150r10-bf15-shaping/cp-pei-random-0_95-15-1690467921_7118568.csv',
'cp-e150r10-bf15-shaping/cp-pei-regular-25_0-15-1690470012_0117908.csv',
'cp-e150r10-bf15-shaping/cp-pei-improvement-0_1-15-1690472449_4115295.csv',
] ]
filepaths = [os.path.join(file_path, filename) for filename in filenames]
# Demonstrating the adjusted function with subplot titles # Demonstrating the adjusted function with subplot titles
titles = ["Plot 1", "Plot 2", "Plot 3", "Plot 4"] titles = ["Preference", "Shaping", "Combination", "Regular"]
data_dict_colored = { data_dict_colored = {
'subplot1': [(filepaths[0], 'blue'), (filepaths[1], 'green')], 'subplot1': [(filepaths[0], 'C0'), (filepaths[1], 'C1'), (filepaths[2], 'C2'), (filepaths[3], 'C3')],
'subplot2': [(filepaths[2], 'red')], 'subplot2': [(filepaths[0], 'C0'), (filepaths[4], 'C1'), (filepaths[5], 'C2'), (filepaths[6], 'C3')],
'subplot3': [(filepaths[3], 'purple')], 'subplot3': [(filepaths[0], 'C0'), (filepaths[7], 'C1'), (filepaths[8], 'C2'), (filepaths[9], 'C3')],
'subplot4': [] 'subplot4': [(filepaths[0], 'C0'), (filepaths[2], 'C6'), (filepaths[5], 'C8'), (filepaths[8], 'C5')],
} }
plot_csv_with_titles(data_dict_colored, 'Episodes', 'Reward', titles) plot_csv_with_titles(data_dict_colored, 'Episodes', 'Reward', titles)
file_path_reacher = os.path.join(home_dir, 'Documents/IntRLResults/RE-Results')
filenames_reacher = ['base_line/re-ei-random-1_0-5-1694370994_0363934.csv',
'shaping/re-ei-random-1_0-10-1694359559_616903.csv',
'shaping/re-ei-regular-10_0-5-1694371946_5364418.csv'
]
filepaths_reacher = [os.path.join(file_path_reacher, filename) for filename in filenames_reacher]
titles_reacher = ["Shaping"]
data_dict_reacher = {
'subplot1': [(filepaths_reacher[0], 'C0'), (filepaths_reacher[1], 'C1'), (filepaths_reacher[2], 'C2')]
}
plot_csv_with_titles(data_dict_reacher, 'Episodes', 'Reward', titles_reacher, y_limits=(-150, 50))

View File

@ -18,14 +18,15 @@ warnings.filterwarnings("ignore", category=ConvergenceWarning)
seed = None seed = None
random_state = np.random.RandomState(seed=seed) random_state = np.random.RandomState(seed=seed)
env = suite.load('reacher', 'hard', task_kwargs={'random': random_state}) env = suite.load('finger', 'turn_easy', task_kwargs={'random': random_state})
spec = env.action_spec() spec = env.action_spec()
print(spec)
time_step = env.reset() time_step = env.reset()
nr_steps = 100 nr_steps = 100
nr_runs = 10 nr_runs = 1
nr_dims = spec.shape[0] nr_dims = spec.shape[0]
iteration_steps = 50 iteration_steps = 10
acquisition_fun = "Expected Improvement" acquisition_fun = "Expected Improvement"
nr_weights = 15 nr_weights = 15
@ -63,6 +64,7 @@ def runner(env_, policy_):
for step in range(nr_steps): for step in range(nr_steps):
action = policy_[step] action = policy_[step]
output = env_.step(action) output = env_.step(action)
print(output.reward)
if output.reward != 0: if output.reward != 0:
reward += output.reward * 10 reward += output.reward * 10