188 lines
6.6 KiB
Python
188 lines
6.6 KiB
Python
import numpy as np
|
|
from scipy.optimize import minimize
|
|
from sklearn.gaussian_process import GaussianProcessRegressor
|
|
from sklearn.gaussian_process.kernels import Matern
|
|
import matplotlib.pyplot as plt
|
|
from mpl_toolkits.mplot3d import Axes3D
|
|
from AcquistionFunctions.ExpectedImprovementwithPreference import expected_improvement
|
|
|
|
from sklearn.exceptions import ConvergenceWarning
|
|
import warnings
|
|
|
|
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
|
|
|
|
|
|
|
# Define the Branin function
|
|
def branin(X):
|
|
x, y = X
|
|
result = (y - (5.1 / (4*np.pi**2)) * x**2 +
|
|
5 * x / np.pi - 6)**2 + 10 * (1 - 1 / (8*np.pi)) * np.cos(x) + 10
|
|
return result
|
|
|
|
|
|
# Define the Rosenbrock function
|
|
def rosenbrock(X):
|
|
x, y = X
|
|
return (1 - x)**2 + 100 * (y - x**2)**2
|
|
|
|
|
|
# Function to plot the Test function and the samples
|
|
def plot_function_and_samples(func, X_samples, Y_samples, iteration):
|
|
fig = plt.figure(figsize=(10, 8))
|
|
ax = fig.add_subplot(111, projection='3d')
|
|
|
|
# Create a grid of points
|
|
x = np.linspace(-2, 2, 100)
|
|
y = np.linspace(-1, 3, 100)
|
|
X, Y = np.meshgrid(x, y)
|
|
Z = func([X, Y])
|
|
|
|
# Plot the function
|
|
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, color='b', alpha=0.2, linewidth=0)
|
|
|
|
# Plot the samples
|
|
ax.scatter(X_samples[:, 0], X_samples[:, 1], Y_samples, color='r', s=50)
|
|
|
|
ax.set_title(f"Iteration {iteration}")
|
|
ax.set_xlabel('X')
|
|
ax.set_ylabel('Y')
|
|
ax.set_zlabel('f(X, Y)')
|
|
ax.view_init(elev=30, azim=-30)
|
|
|
|
plt.show()
|
|
|
|
def bayesian_optimization(n_iter, change_to_gaussian_after=10):
|
|
# # The bounds of the function
|
|
# bounds = np.array([[-5.0, 10.0], [0.0, 15.0]]) #brainin
|
|
bounds = np.array([[-2.0, 2.0], [-1.0, 3.0]]) #rosenbrock
|
|
|
|
# Initial samples
|
|
X_init = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds], size=(2, len(bounds)))
|
|
Y_init = np.array([[rosenbrock(x)] for x in X_init])
|
|
|
|
# Gaussian process with Matérn kernel as surrogate model
|
|
m52 = Matern(length_scale=[1.0, 1.0], nu=2.5)
|
|
gp = GaussianProcessRegressor(kernel=m52, alpha=1e-5)
|
|
|
|
# Boolean array indicating which dimensions should be sampled from a Gaussian distribution
|
|
use_gaussian = np.array([False, False])
|
|
|
|
# Means and variances for the Gaussian distributions
|
|
means = np.array([0.0, 0.0])
|
|
variances = np.array([(b[1] - b[0]) * 0.25 for b in bounds])**2
|
|
|
|
# Array to store the best result from each iteration
|
|
best_results = np.empty(n_iter)
|
|
|
|
# Bayesian optimization loop
|
|
for i_ in range(n_iter):
|
|
# Update Gaussian process with existing samples
|
|
gp.fit(X_init, Y_init)
|
|
|
|
# Obtain next sampling point from the acquisition function (expected_improvement)
|
|
n_samples = 1000
|
|
X_next = expected_improvement(X_init, n_samples, gp, use_gaussian, means, variances)
|
|
|
|
# Obtain next noisy sample from the objective function
|
|
Y_next = rosenbrock(X_next)
|
|
|
|
# Add sample to previous samples
|
|
X_init = np.vstack((X_init, X_next))
|
|
Y_init = np.vstack((Y_init, Y_next))
|
|
|
|
# After change_to_gaussian_after iterations, start using Gaussian distribution for sampling
|
|
if i_ >= change_to_gaussian_after:
|
|
use_gaussian = np.array([True, True])
|
|
# Update means with the mean of the best 5 results
|
|
best_5_idx = np.argsort(Y_init, axis=0)[:5].flatten()
|
|
means = np.mean(X_init[best_5_idx], axis=0)
|
|
# Reduce variances by 5 percent
|
|
variances *= 0.95
|
|
|
|
# Store the best result from this iteration
|
|
best_results[i_] = np.min(Y_init)
|
|
|
|
return best_results
|
|
|
|
|
|
# Run the benchmarking test and store the results
|
|
results_with_gaussian = np.empty((50, 20))
|
|
results_without_gaussian = np.empty((50, 20))
|
|
|
|
for i in range(50):
|
|
results_with_gaussian[i, :] = bayesian_optimization(20, 10)
|
|
results_without_gaussian[i, :] = bayesian_optimization(20, 50)
|
|
|
|
# Compute the mean and standard deviation of the results
|
|
mean_with_gaussian = np.mean(results_with_gaussian, axis=0)
|
|
std_with_gaussian = np.std(results_with_gaussian, axis=0)
|
|
mean_without_gaussian = np.mean(results_without_gaussian, axis=0)
|
|
std_without_gaussian = np.std(results_without_gaussian, axis=0)
|
|
|
|
# Plot the results
|
|
plt.figure(figsize=(10, 6))
|
|
plt.plot(mean_with_gaussian, label='With Gaussian after 10 iterations')
|
|
plt.fill_between(range(20), mean_with_gaussian-std_with_gaussian, mean_with_gaussian+std_with_gaussian, alpha=0.2)
|
|
plt.plot(mean_without_gaussian, label='Without Gaussian')
|
|
plt.fill_between(range(20), mean_without_gaussian-std_without_gaussian, mean_without_gaussian+std_without_gaussian, alpha=0.2)
|
|
plt.xlabel('Iteration')
|
|
plt.ylabel('Best Result')
|
|
plt.legend()
|
|
plt.show()
|
|
|
|
|
|
#
|
|
# # Initial samples
|
|
# X_init = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds], size=(2, len(bounds)))
|
|
# # Y_init = np.array([[branin(x)] for x in X_init])
|
|
# Y_init = np.array([[rosenbrock(x)] for x in X_init])
|
|
#
|
|
# # Gaussian process with Matérn kernel as surrogate model
|
|
# m52 = Matern(length_scale=[1.0, 1.0], nu=2.5)
|
|
# gp = GaussianProcessRegressor(kernel=m52, alpha=1e-5)
|
|
#
|
|
# # Number of iterations
|
|
# n_iter = 50
|
|
#
|
|
# # Boolean array indicating which dimensions should be sampled from a Gaussian distribution
|
|
# use_gaussian = np.array([False, False])
|
|
#
|
|
# # Means and variances for the Gaussian distributions
|
|
# means = np.array([0.0, 0.0])
|
|
# variances = np.array([(b[1] - b[0]) * 0.25 for b in bounds])**2
|
|
#
|
|
# # Bayesian optimization loop
|
|
# for i in range(n_iter):
|
|
# # Update Gaussian process with existing samples
|
|
# gp.fit(X_init, Y_init)
|
|
#
|
|
# # Obtain next sampling point from the acquisition function (expected_improvement)
|
|
# n_samples = 1000
|
|
# X_next = expected_improvement(X_init, n_samples, gp, use_gaussian, means, variances)
|
|
#
|
|
# # Obtain next noisy sample from the objective function
|
|
# # Y_next = branin(X_next)
|
|
# Y_next = rosenbrock(X_next)
|
|
#
|
|
# # Add sample to previous samples
|
|
# X_init = np.vstack((X_init, X_next))
|
|
# Y_init = np.vstack((Y_init, Y_next))
|
|
#
|
|
#
|
|
# # After 10 iterations, start using Gaussian distribution for sampling
|
|
# if i >= 29:
|
|
# use_gaussian = np.array([True, True])
|
|
# # Update means with the mean of the best 5 results
|
|
# best_5_idx = np.argsort(Y_init, axis=0)[:5].flatten()
|
|
# means = np.mean(X_init[best_5_idx], axis=0)
|
|
# # Reduce variances by 10 percent
|
|
# variances *= 0.9
|
|
#
|
|
# # Plot the function and the samples
|
|
# # plot_function_and_samples(rosenbrock, X_init, Y_init, i)
|
|
#
|
|
# print("Final samples: ", X_init)
|
|
# print("Final function values: ", Y_init)
|
|
# print(f"Minimum function values: {np.min(Y_init)}, idx: {np.argmin(Y_init)}")
|