finished BO

Manual case and BO fully functional
This commit is contained in:
Niko Feith 2023-03-08 16:27:14 +01:00
parent 33764e1ac3
commit 1416e72675
4 changed files with 31 additions and 18 deletions

View File

@ -5,5 +5,6 @@ uint16 nr_runs
string acquisition_function string acquisition_function
--- ---
float32[] best_policy float32[] best_policy
float32[] best_weights
float32[] reward_mean float32[] reward_mean
float32[] reward_std float32[] reward_std

View File

@ -7,6 +7,7 @@ from active_bo_ros.AcquisitionFunctions.ExpectedImprovement import ExpectedImpro
from active_bo_ros.AcquisitionFunctions.ProbabilityOfImprovement import ProbabilityOfImprovement from active_bo_ros.AcquisitionFunctions.ProbabilityOfImprovement import ProbabilityOfImprovement
from active_bo_ros.AcquisitionFunctions.ConfidenceBound import ConfidenceBound from active_bo_ros.AcquisitionFunctions.ConfidenceBound import ConfidenceBound
class BayesianOptimization: class BayesianOptimization:
def __init__(self, env, nr_steps, nr_init=3, acq='ei', nr_weights=6, policy_seed=None): def __init__(self, env, nr_steps, nr_init=3, acq='ei', nr_weights=6, policy_seed=None):
self.env = env self.env = env
@ -43,7 +44,6 @@ class BayesianOptimization:
self.best_reward = np.empty((1, 1)) self.best_reward = np.empty((1, 1))
def runner(self, policy): def runner(self, policy):
done = False
env_reward = 0.0 env_reward = 0.0
step_count = 0 step_count = 0
@ -87,7 +87,7 @@ class BayesianOptimization:
self.GP.fit(self.X, self.Y) self.GP.fit(self.X, self.Y)
def next_observation(self): def next_observation(self):
if self.acq == 'ei': if self.acq == "Expected Improvement":
x_next = ExpectedImprovement(self.GP, x_next = ExpectedImprovement(self.GP,
self.X, self.X,
self.eval_X, self.eval_X,
@ -99,7 +99,7 @@ class BayesianOptimization:
return x_next return x_next
elif self.acq == 'pi': elif self.acq == "Probability of Improvement":
x_next = ProbabilityOfImprovement(self.GP, x_next = ProbabilityOfImprovement(self.GP,
self.X, self.X,
self.eval_X, self.eval_X,
@ -111,7 +111,7 @@ class BayesianOptimization:
return x_next return x_next
elif self.acq == 'cb': elif self.acq == "Upper Confidence Bound":
x_next = ConfidenceBound(self.GP, x_next = ConfidenceBound(self.GP,
self.X, self.X,
self.eval_X, self.eval_X,
@ -137,6 +137,9 @@ class BayesianOptimization:
self.GP.fit(self.X, self.Y) self.GP.fit(self.X, self.Y)
if self.episode == 0:
self.best_reward[0] = max(self.Y)
else:
self.best_reward = np.vstack((self.best_reward, max(self.Y))) self.best_reward = np.vstack((self.best_reward, max(self.Y)))
self.episode += 1 self.episode += 1
@ -148,8 +151,6 @@ class BayesianOptimization:
x_max = self.X[idx, :] x_max = self.X[idx, :]
self.policy_model.weights = x_max self.policy_model.weights = x_max
best_policy = self.policy_model.rollout().reshape(-1,)
return self.policy_model.rollout(), y_hat[idx] return best_policy, y_hat[idx], x_max

View File

@ -1,4 +1,6 @@
import numpy as np import numpy as np
class GaussianRBF: class GaussianRBF:
def __init__(self, nr_weights, nr_steps, seed=None, lowerb=-1.0, upperb=1.0): def __init__(self, nr_weights, nr_steps, seed=None, lowerb=-1.0, upperb=1.0):
self.nr_weights = nr_weights self.nr_weights = nr_weights
@ -33,11 +35,13 @@ class GaussianRBF:
return self.policy return self.policy
def main(): def main():
policy = GaussianRBFModel(1, 50) policy = GaussianRBF(1, 50)
policy.random_policy() policy.random_policy()
policy.policy_rollout() policy.rollout()
print(policy.weights) print(policy.weights)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -8,6 +8,7 @@ from active_bo_ros.ReinforcementLearning.ContinuousMountainCar import Continuous
import numpy as np import numpy as np
class BOService(Node): class BOService(Node):
def __init__(self): def __init__(self):
super().__init__('bo_service') super().__init__('bo_service')
@ -19,15 +20,18 @@ class BOService(Node):
self.nr_init = 3 self.nr_init = 3
def bo_callback(self, request, response): def bo_callback(self, request, response):
self.get_logger().info('Bayesian Optimization Service started!')
nr_weights = request.nr_weights nr_weights = request.nr_weights
max_steps = request.steps max_steps = request.max_steps
nr_episodes = request.nr_episodes nr_episodes = request.nr_episodes
nr_runs = request.nr_runs nr_runs = request.nr_runs
acq = request.acquisition_function acq = request.acquisition_function
self.get_logger().info(acq)
reward = np.zeros((nr_episodes, nr_runs)) reward = np.zeros((nr_episodes, nr_runs))
best_pol_reward = np.zeros((nr_runs, 1)) best_pol_reward = np.zeros((1, nr_runs))
best_policy = np.zeros((max_steps, nr_runs)) best_policy = np.zeros((max_steps, nr_runs))
best_weights = np.zeros((nr_weights, nr_runs))
BO = BayesianOptimization(self.env, BO = BayesianOptimization(self.env,
max_steps, max_steps,
@ -42,14 +46,16 @@ class BOService(Node):
x_next = BO.next_observation() x_next = BO.next_observation()
BO.eval_new_observation(x_next) BO.eval_new_observation(x_next)
best_policy[:, i], best_pol_reward[:, i] = BO.get_best_result() best_policy[:, i], best_pol_reward[:, i], best_weights[:, i] = BO.get_best_result()
reward[:, i] = BO.best_reward.T reward[:, i] = BO.best_reward.T
response.reward_mean = np.mean(reward, axis=1) response.reward_mean = np.mean(reward, axis=1).tolist()
response.reward_std = np.std(reward, axis=1) response.reward_std = np.std(reward, axis=1).tolist()
best_policy_idx = np.argmax(best_pol_reward) best_policy_idx = np.argmax(best_pol_reward)
response.best_policy = best_policy[:, best_policy_idx] response.best_weights = best_weights[:, best_policy_idx].tolist()
response.best_policy = best_policy[:, best_policy_idx].tolist()
return response return response
@ -60,5 +66,6 @@ def main(args=None):
rclpy.spin(bo_service) rclpy.spin(bo_service)
if __name__ == '__main__': if __name__ == '__main__':
main() main()