BOTorch working
This commit is contained in:
parent
72f3e4e361
commit
4fe3973a53
@ -187,6 +187,7 @@ class BayesianOptimization:
|
|||||||
def get_best_result(self, plotter=True):
|
def get_best_result(self, plotter=True):
|
||||||
y_hat = self.gp.predict(self.X)
|
y_hat = self.gp.predict(self.X)
|
||||||
idx = np.argmax(y_hat)
|
idx = np.argmax(y_hat)
|
||||||
|
print(idx, np.argmax(self.Y))
|
||||||
x_max = self.X[idx, :]
|
x_max = self.X[idx, :]
|
||||||
self.policy_model.weights = x_max
|
self.policy_model.weights = x_max
|
||||||
self.policy_model.policy_rollout()
|
self.policy_model.policy_rollout()
|
||||||
@ -199,7 +200,7 @@ class BayesianOptimization:
|
|||||||
def main():
|
def main():
|
||||||
nr_steps = 100
|
nr_steps = 100
|
||||||
env = Continuous_MountainCarEnv() # render_mode='human'
|
env = Continuous_MountainCarEnv() # render_mode='human'
|
||||||
bo = BayesianOptimization(env, nr_steps, acq='ei')
|
bo = BayesianOptimization(env, nr_steps, nr_weights=10, acq='ei')
|
||||||
bo.initialize()
|
bo.initialize()
|
||||||
iteration_steps = 200
|
iteration_steps = 200
|
||||||
for i in range(iteration_steps):
|
for i in range(iteration_steps):
|
||||||
|
@ -2,7 +2,7 @@ import numpy as np
|
|||||||
import torch
|
import torch
|
||||||
from botorch.models import SingleTaskGP
|
from botorch.models import SingleTaskGP
|
||||||
from botorch.optim import optimize_acqf
|
from botorch.optim import optimize_acqf
|
||||||
from gpytorch.kernels import MaternKernel
|
from gpytorch.kernels import MaternKernel, RBFKernel
|
||||||
from botorch.fit import fit_gpytorch_mll
|
from botorch.fit import fit_gpytorch_mll
|
||||||
from gpytorch.mlls import ExactMarginalLogLikelihood
|
from gpytorch.mlls import ExactMarginalLogLikelihood
|
||||||
|
|
||||||
@ -52,7 +52,7 @@ class BayesianOptimization:
|
|||||||
self.upper_bound)
|
self.upper_bound)
|
||||||
|
|
||||||
self.eval_X = 200
|
self.eval_X = 200
|
||||||
self.eval_restarts = 5
|
self.eval_restarts = 10
|
||||||
|
|
||||||
def reset_bo(self):
|
def reset_bo(self):
|
||||||
self.counter_array = np.empty((1, 1))
|
self.counter_array = np.empty((1, 1))
|
||||||
@ -104,7 +104,7 @@ class BayesianOptimization:
|
|||||||
|
|
||||||
Y = torch.tensor(self.Y_np)
|
Y = torch.tensor(self.Y_np)
|
||||||
|
|
||||||
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
|
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
|
||||||
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
||||||
fit_gpytorch_mll(mll)
|
fit_gpytorch_mll(mll)
|
||||||
|
|
||||||
@ -153,7 +153,7 @@ class BayesianOptimization:
|
|||||||
|
|
||||||
Y = torch.tensor(self.Y_np)
|
Y = torch.tensor(self.Y_np)
|
||||||
|
|
||||||
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
|
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
|
||||||
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
||||||
fit_gpytorch_mll(mll)
|
fit_gpytorch_mll(mll)
|
||||||
|
|
||||||
@ -177,30 +177,37 @@ class BayesianOptimization:
|
|||||||
self.episode += 1
|
self.episode += 1
|
||||||
|
|
||||||
def get_best_result(self):
|
def get_best_result(self):
|
||||||
|
Y = torch.tensor(self.Y_np)
|
||||||
|
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
|
||||||
|
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
||||||
|
fit_gpytorch_mll(mll)
|
||||||
|
|
||||||
y_hat = self.GP.posterior(self.X)
|
y_hat = self.GP.posterior(self.X)
|
||||||
idx = torch.argmax(y_hat)
|
idx = torch.argmax(y_hat.mean)
|
||||||
x_max = self.X[idx, :].detach().numpy()
|
x_max = self.X[idx, :].detach().numpy()
|
||||||
|
|
||||||
|
print(idx, np.argmax(self.Y_np))
|
||||||
|
|
||||||
self.policy_model.weights = x_max
|
self.policy_model.weights = x_max
|
||||||
best_policy = self.policy_model.policy_rollout().reshape(-1, )
|
best_policy = self.policy_model.policy_rollout().reshape(-1, )
|
||||||
|
|
||||||
return best_policy, y_hat[idx].detach().numpy(), x_max
|
return best_policy, y_hat.mean[idx].detach().numpy(), x_max
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
nr_steps = 100
|
nr_steps = 100
|
||||||
env = Continuous_MountainCarEnv() # render_mode='human'
|
env = Continuous_MountainCarEnv() # render_mode='human'
|
||||||
bo = BayesianOptimization(env, nr_steps, nr_weights=5, acq="Expected Improvement")
|
bo = BayesianOptimization(env, nr_steps, nr_weights=10, acq="Expected Improvement")
|
||||||
bo.initialize()
|
bo.initialize()
|
||||||
iteration_steps = 200
|
iteration_steps = 500
|
||||||
for i in range(iteration_steps):
|
for i in range(iteration_steps):
|
||||||
x_next = bo.next_observation()
|
x_next = bo.next_observation()
|
||||||
step_count = bo.eval_new_observation(x_next)
|
step_count = bo.eval_new_observation(x_next)
|
||||||
|
|
||||||
print(bo.episode, bo.best_reward[-1][0], step_count)
|
print(bo.episode, bo.best_reward[-1][0], bo.Y_np[-1][0], step_count)
|
||||||
|
|
||||||
print(bo.Y_np)
|
_, a, _ =bo.get_best_result()
|
||||||
print(bo.X_np)
|
print(a)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -165,8 +165,8 @@ class Continuous_MountainCarEnv(gym.Env):
|
|||||||
|
|
||||||
reward = 0
|
reward = 0
|
||||||
if terminated:
|
if terminated:
|
||||||
reward += 10
|
reward += 50
|
||||||
reward -= math.pow(action[0], 2) * 0.1
|
reward -= math.pow(action[0], 2)
|
||||||
reward -= 1
|
reward -= 1
|
||||||
|
|
||||||
self.state = np.array([position, velocity], dtype=np.float32)
|
self.state = np.array([position, velocity], dtype=np.float32)
|
||||||
|
Loading…
Reference in New Issue
Block a user