diff --git a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py index 47786ef..080940c 100644 --- a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py +++ b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BayesianOptimization.py @@ -71,8 +71,8 @@ class BayesianOptimization: self.env.reset(seed=seed) self.reset_bo() - self.X = np.zeros((self.nr_init, self.nr_policy_weights)) - self.Y = np.zeros((self.nr_init, 1)) + self.X = np.zeros((self.nr_init, self.nr_policy_weights), dtype=np.float32) + self.Y = np.zeros((self.nr_init, 1), dtype=np.float32) for i in range(self.nr_init): self.policy_model.random_policy() @@ -127,29 +127,29 @@ class BayesianOptimization: reward, step_count = self.runner(policy, seed=seed) - self.X = np.vstack((self.X, x_next)) - self.Y = np.vstack((self.Y, reward)) + self.X = np.vstack((self.X, x_next), dtype=np.float32) + self.Y = np.vstack((self.Y, reward), dtype=np.float32) self.GP.fit(self.X, self.Y) if self.episode == 0: - self.best_reward[0] = max(self.Y) + self.best_reward[0] = np.max(self.Y) else: - self.best_reward = np.vstack((self.best_reward, max(self.Y))) + self.best_reward = np.vstack((self.best_reward, np.max(self.Y)), dtype=np.float32) self.episode += 1 return step_count def add_new_observation(self, reward, x_new): - self.X = np.vstack((self.X, x_new)) - self.Y = np.vstack((self.Y, reward)) + self.X = np.vstack((self.X, x_new), dtype=np.float32) + self.Y = np.vstack((self.Y, reward), dtype=np.float32) self.GP.fit(self.X, self.Y) if self.episode == 0: - self.best_reward[0] = max(self.Y) + self.best_reward[0] = np.max(self.Y) else: - self.best_reward = np.vstack((self.best_reward, max(self.Y))) + self.best_reward = np.vstack((self.best_reward, np.max(self.Y)), dtype=np.float32) self.episode += 1 @@ -161,4 +161,4 @@ class BayesianOptimization: self.policy_model.weights = x_max best_policy = self.policy_model.rollout().reshape(-1,) - return best_policy, y_max, x_max + return best_policy, y_max, x_max, idx diff --git a/src/active_bo_ros/active_bo_ros/active_bo_topic.py b/src/active_bo_ros/active_bo_ros/active_bo_topic.py index b9284ff..6bc3988 100644 --- a/src/active_bo_ros/active_bo_ros/active_bo_topic.py +++ b/src/active_bo_ros/active_bo_ros/active_bo_topic.py @@ -57,6 +57,7 @@ class ActiveBOTopic(Node): self.current_run = 0 self.current_episode = 0 self.seed = None + self.seed_array = None self.save_result = False # Active Reinforcement Learning Publisher, Subscriber and Message attributes @@ -106,6 +107,7 @@ class ActiveBOTopic(Node): self.current_run = 0 self.current_episode = 0 self.save_result = False + self.seed_array = None def active_bo_callback(self, msg): if not self.active_bo_pending: @@ -121,6 +123,7 @@ class ActiveBOTopic(Node): self.bo_acq_fcn = msg.acquisition_function self.bo_metric_parameter = msg.metric_parameter self.save_result = msg.save_result + self.seed_array = np.zeros((1, self.bo_runs)) # initialize self.reward = np.zeros((self.bo_episodes, self.bo_runs)) @@ -131,6 +134,7 @@ class ActiveBOTopic(Node): # set the seed if self.bo_fixed_seed: self.seed = int(np.random.randint(1, 2147483647, 1)[0]) + self.get_logger().info(str(self.seed)) else: self.seed = None @@ -185,6 +189,10 @@ class ActiveBOTopic(Node): bo_response.best_policy = self.best_policy[:, best_policy_idx].tolist() bo_response.best_weights = self.best_weights[:, best_policy_idx].tolist() + self.get_logger().info(f'Best Policy: {self.best_pol_reward}') + + self.get_logger().info(f'{best_policy_idx}, {int(self.seed_array[0, best_policy_idx])}') + bo_response.reward_mean = np.mean(self.reward, axis=1).tolist() bo_response.reward_std = np.std(self.reward, axis=1).tolist() @@ -217,14 +225,17 @@ class ActiveBOTopic(Node): filename = filename.replace('.', '_') + '.csv' path = os.path.join(file_path, filename) - np.savetxt(path, self.reward, delimiter=',') + data = self.reward + + np.savetxt(path, data, delimiter=',') active_rl_request = ActiveRL() - if self.seed is None: - seed = int(np.random.randint(1, 2147483647, 1)[0]) + if self.bo_fixed_seed: + seed = int(self.seed_array[0, best_policy_idx]) + self.get_logger().info(f'Used seed{seed}') else: - seed = self.seed + seed = int(np.random.randint(1, 2147483647, 1)[0]) active_rl_request.env = self.bo_env active_rl_request.seed = seed @@ -232,7 +243,6 @@ class ActiveBOTopic(Node): active_rl_request.weights = self.best_weights[:, best_policy_idx].tolist() active_rl_request.final_run = True - self.get_logger().info('Calling: Active RL') self.active_rl_pub.publish(active_rl_request) self.get_logger().info('Responding: Active BO') @@ -269,12 +279,15 @@ class ActiveBOTopic(Node): if user_query.query(): active_rl_request = ActiveRL() - old_policy, _, old_weights = self.BO.get_best_result() + old_policy, y_max, old_weights, _ = self.BO.get_best_result() - if self.seed is None: - seed = int(np.random.randint(1, 2147483647, 1)[0]) - else: + self.get_logger().info(f'Best: {y_max}, w:{old_weights}') + self.get_logger().info(f'Size of Y: {self.BO.Y.shape}, Size of X: {self.BO.X.shape}') + + if self.bo_fixed_seed: seed = self.seed + else: + seed = int(np.random.randint(1, 2147483647, 1)[0]) active_rl_request.env = self.bo_env active_rl_request.seed = seed @@ -291,17 +304,23 @@ class ActiveBOTopic(Node): self.BO.eval_new_observation(x_next) self.current_episode += 1 - self.get_logger().info(f'Current Episode: {self.current_episode}') + # self.get_logger().info(f'Current Episode: {self.current_episode}') else: self.best_policy[:, self.current_run], \ self.best_pol_reward[:, self.current_run], \ - self.best_weights[:, self.current_run] = self.BO.get_best_result() + self.best_weights[:, self.current_run], idx = self.BO.get_best_result() + + self.get_logger().info(f'best idx: {idx}') self.reward[:, self.current_run] = self.BO.best_reward.T self.BO = None self.current_episode = 0 + if self.bo_fixed_seed: + self.seed_array[0, self.current_run] = self.seed + self.seed = int(np.random.randint(1, 2147483647, 1)[0]) + self.get_logger().info(f'{self.seed}') self.current_run += 1 self.get_logger().info(f'Current Run: {self.current_run}') @@ -314,8 +333,8 @@ class ActiveBOTopic(Node): state_msg.current_run = self.current_run + 1 if self.current_run < self.bo_runs else self.bo_runs state_msg.current_episode = self.current_episode + 1 \ if self.current_episode < self.bo_episodes else self.bo_episodes - state_msg.best_reward = self.best_reward - state_msg.last_user_reward = self.rl_reward + state_msg.best_reward = float(self.best_reward) + state_msg.last_user_reward = float(self.rl_reward) self.state_pub.publish(state_msg)