BOTorch working
This commit is contained in:
parent
72f3e4e361
commit
4fe3973a53
@ -187,6 +187,7 @@ class BayesianOptimization:
|
||||
def get_best_result(self, plotter=True):
|
||||
y_hat = self.gp.predict(self.X)
|
||||
idx = np.argmax(y_hat)
|
||||
print(idx, np.argmax(self.Y))
|
||||
x_max = self.X[idx, :]
|
||||
self.policy_model.weights = x_max
|
||||
self.policy_model.policy_rollout()
|
||||
@ -199,7 +200,7 @@ class BayesianOptimization:
|
||||
def main():
|
||||
nr_steps = 100
|
||||
env = Continuous_MountainCarEnv() # render_mode='human'
|
||||
bo = BayesianOptimization(env, nr_steps, acq='ei')
|
||||
bo = BayesianOptimization(env, nr_steps, nr_weights=10, acq='ei')
|
||||
bo.initialize()
|
||||
iteration_steps = 200
|
||||
for i in range(iteration_steps):
|
||||
|
@ -2,7 +2,7 @@ import numpy as np
|
||||
import torch
|
||||
from botorch.models import SingleTaskGP
|
||||
from botorch.optim import optimize_acqf
|
||||
from gpytorch.kernels import MaternKernel
|
||||
from gpytorch.kernels import MaternKernel, RBFKernel
|
||||
from botorch.fit import fit_gpytorch_mll
|
||||
from gpytorch.mlls import ExactMarginalLogLikelihood
|
||||
|
||||
@ -52,7 +52,7 @@ class BayesianOptimization:
|
||||
self.upper_bound)
|
||||
|
||||
self.eval_X = 200
|
||||
self.eval_restarts = 5
|
||||
self.eval_restarts = 10
|
||||
|
||||
def reset_bo(self):
|
||||
self.counter_array = np.empty((1, 1))
|
||||
@ -104,7 +104,7 @@ class BayesianOptimization:
|
||||
|
||||
Y = torch.tensor(self.Y_np)
|
||||
|
||||
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
|
||||
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
|
||||
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
||||
fit_gpytorch_mll(mll)
|
||||
|
||||
@ -153,7 +153,7 @@ class BayesianOptimization:
|
||||
|
||||
Y = torch.tensor(self.Y_np)
|
||||
|
||||
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=MaternKernel(nu=1.5))
|
||||
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
|
||||
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
||||
fit_gpytorch_mll(mll)
|
||||
|
||||
@ -177,30 +177,37 @@ class BayesianOptimization:
|
||||
self.episode += 1
|
||||
|
||||
def get_best_result(self):
|
||||
Y = torch.tensor(self.Y_np)
|
||||
self.GP = SingleTaskGP(train_X=self.X, train_Y=Y, covar_module=RBFKernel())
|
||||
mll = ExactMarginalLogLikelihood(self.GP.likelihood, self.GP)
|
||||
fit_gpytorch_mll(mll)
|
||||
|
||||
y_hat = self.GP.posterior(self.X)
|
||||
idx = torch.argmax(y_hat)
|
||||
idx = torch.argmax(y_hat.mean)
|
||||
x_max = self.X[idx, :].detach().numpy()
|
||||
|
||||
print(idx, np.argmax(self.Y_np))
|
||||
|
||||
self.policy_model.weights = x_max
|
||||
best_policy = self.policy_model.policy_rollout().reshape(-1, )
|
||||
|
||||
return best_policy, y_hat[idx].detach().numpy(), x_max
|
||||
return best_policy, y_hat.mean[idx].detach().numpy(), x_max
|
||||
|
||||
|
||||
def main():
|
||||
nr_steps = 100
|
||||
env = Continuous_MountainCarEnv() # render_mode='human'
|
||||
bo = BayesianOptimization(env, nr_steps, nr_weights=5, acq="Expected Improvement")
|
||||
bo = BayesianOptimization(env, nr_steps, nr_weights=10, acq="Expected Improvement")
|
||||
bo.initialize()
|
||||
iteration_steps = 200
|
||||
iteration_steps = 500
|
||||
for i in range(iteration_steps):
|
||||
x_next = bo.next_observation()
|
||||
step_count = bo.eval_new_observation(x_next)
|
||||
|
||||
print(bo.episode, bo.best_reward[-1][0], step_count)
|
||||
print(bo.episode, bo.best_reward[-1][0], bo.Y_np[-1][0], step_count)
|
||||
|
||||
print(bo.Y_np)
|
||||
print(bo.X_np)
|
||||
_, a, _ =bo.get_best_result()
|
||||
print(a)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -165,8 +165,8 @@ class Continuous_MountainCarEnv(gym.Env):
|
||||
|
||||
reward = 0
|
||||
if terminated:
|
||||
reward += 10
|
||||
reward -= math.pow(action[0], 2) * 0.1
|
||||
reward += 50
|
||||
reward -= math.pow(action[0], 2)
|
||||
reward -= 1
|
||||
|
||||
self.state = np.array([position, velocity], dtype=np.float32)
|
||||
|
Loading…
Reference in New Issue
Block a user