diff --git a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BO2D.py b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BO2D.py index 073ca48..298d206 100644 --- a/src/active_bo_ros/active_bo_ros/BayesianOptimization/BO2D.py +++ b/src/active_bo_ros/active_bo_ros/BayesianOptimization/BO2D.py @@ -114,7 +114,7 @@ class BayesianOptimization: def get_best_result(self): y_max = np.max(self.Y) idx = np.argmax(self.Y) - x_max = self.X[idx, :] + x_max = self.X[idx, :].reshape((self.nr_weights, self.nr_dims), order='F') return y_max, x_max, idx diff --git a/src/active_bo_ros/active_bo_ros/interactive_bo_2d.py b/src/active_bo_ros/active_bo_ros/interactive_bo_2d.py index d174edd..f222ecb 100644 --- a/src/active_bo_ros/active_bo_ros/interactive_bo_2d.py +++ b/src/active_bo_ros/active_bo_ros/interactive_bo_2d.py @@ -152,12 +152,6 @@ class ActiveBOTopic(Node): else: self.seed = None - # set rl environment - if self.bo_env == "Reacher": - pass - else: - raise NotImplementedError - def reset_rl_response(self): self.rl_weights = None self.rl_final_step = None @@ -222,8 +216,8 @@ class ActiveBOTopic(Node): rl_msg = ActiveRLRequest() rl_msg.env = self.bo_env rl_msg.seed = seed - rl_msg.display_run = False - rl_msg.interactive_run = 2 + rl_msg.display_run = True + rl_msg.interactive_run = 3 rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist() rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist() rl_msg.nr_weights = self.bo_nr_weights @@ -243,12 +237,14 @@ class ActiveBOTopic(Node): bo_response.nr_weights = self.bo_nr_weights bo_response.nr_steps = self.bo_steps - bo_response.reward_mean = np.mean(self.reward, axis=1).tolist() - bo_response.reward_std = np.std(self.reward, axis=1).tolist() + bo_response.reward_mean = np.mean(self.reward, axis=0).tolist() + bo_response.reward_std = np.std(self.reward, axis=0).tolist() if self.save_result: if self.bo_env == "Reacher": env = 're' + elif self.bo_env == "Finger": + env = 'fin' else: raise NotImplementedError @@ -278,7 +274,7 @@ class ActiveBOTopic(Node): active_rl_request = ActiveRLRequest() if self.bo_fixed_seed: - seed = int(self.seed_array[0, best_policy_idx]) + seed = int(self.seed_array[best_policy_idx, 0]) else: seed = int(np.random.randint(1, 2147483647, 1)[0]) @@ -386,6 +382,9 @@ class ActiveBOTopic(Node): self.best_pol_reward[self.current_run, :], \ self.best_weights[self.current_run, :, :], idx = self.BO.get_best_result() + self.BO.policy_model.weights = self.best_weights[self.current_run, :, :] + self.best_policy[self.current_run, :, :] = self.BO.policy_model.rollout() + if self.current_run < self.bo_runs - 1: self.BO = None @@ -393,7 +392,6 @@ class ActiveBOTopic(Node): self.last_query = 0 if self.bo_fixed_seed: self.seed_array[self.current_run, 0] = self.seed - else: self.seed = int(np.random.randint(1, 2147483647, 1)[0]) # self.get_logger().info(f'{self.seed}') self.current_run += 1 diff --git a/src/active_bo_ros/active_bo_ros/interactive_rl_2d.py b/src/active_bo_ros/active_bo_ros/interactive_rl_2d.py index a3bd307..6163443 100644 --- a/src/active_bo_ros/active_bo_ros/interactive_rl_2d.py +++ b/src/active_bo_ros/active_bo_ros/interactive_rl_2d.py @@ -124,7 +124,6 @@ class ActiveRL(Node): def step(self, policy, display_run): done = False - action = policy[self.rl_step, :] action_clipped = action.clip(min=-1.0, max=1.0) output = self.env.step(action_clipped.astype(np.float64)) @@ -135,7 +134,7 @@ class ActiveRL(Node): self.rl_reward += output.reward * 10 done = True else: - self.rl_reward -= 1.0 + self.rl_reward -= 1.0 + np.linalg.norm(action_clipped) * 0.1 if display_run: rgb_array = self.env.physics.render(camera_id=0, height=320, width=480) @@ -173,10 +172,10 @@ class ActiveRL(Node): step_count += 1 if output.reward != 0.0: - self.rl_reward += output.reward * 10 + env_reward += output.reward * 10 done = True else: - self.rl_reward -= 1.0 + env_reward -= 1.0 + np.linalg.norm(action_clipped) * 0.1 if done: break @@ -184,7 +183,7 @@ class ActiveRL(Node): def mainloop_callback(self): if self.rl_pending: - if self.interactive_run == 0: + if self.interactive_run == 0: # Interactive Mode if not self.best_pol_shown: if not self.policy_sent: self.rl_step = 0 @@ -195,11 +194,18 @@ class ActiveRL(Node): self.env = suite.load('reacher', 'hard', task_kwargs={'random': random_state}) - self.rl_spec = self.env.action_spec() - self.env.reset() + elif self.rl_env == "Finger": + np.random.seed(self.rl_seed) + random_state = np.random.RandomState(seed=self.rl_seed) + self.env = suite.load('finger', + 'turn_easy', + task_kwargs={'random': random_state}) else: raise NotImplementedError + self.rl_spec = self.env.action_spec() + self.env.reset() + eval_request = ActiveRLEvalRequest() eval_request.policy = self.rl_policy.flatten('F').tolist() eval_request.weights = self.rl_weights @@ -224,11 +230,18 @@ class ActiveRL(Node): self.env = suite.load('reacher', 'hard', task_kwargs={'random': random_state}) - self.rl_spec = self.env.action_spec() - self.env.reset() + elif self.rl_env == "Finger": + np.random.seed(self.rl_seed) + random_state = np.random.RandomState(seed=self.rl_seed) + self.env = suite.load('finger', + 'turn_easy', + task_kwargs={'random': random_state}) else: raise NotImplementedError + self.rl_spec = self.env.action_spec() + self.env.reset() + elif self.best_pol_shown: if not self.eval_response_received: pass @@ -256,7 +269,7 @@ class ActiveRL(Node): self.eval_response_received = False self.rl_pending = False - elif self.interactive_run == 1: + elif self.interactive_run == 1: # Last Run if not self.policy_sent: self.rl_step = 0 self.rl_reward = 0.0 @@ -265,18 +278,26 @@ class ActiveRL(Node): np.random.seed(self.rl_seed) random_state = np.random.RandomState(seed=self.rl_seed) self.env = suite.load('reacher', 'hard', task_kwargs={'random': random_state}) - self.rl_spec = self.env.action_spec() - self.env.reset() + elif self.rl_env == "Finger": + np.random.seed(self.rl_seed) + random_state = np.random.RandomState(seed=self.rl_seed) + self.env = suite.load('finger', + 'turn_easy', + task_kwargs={'random': random_state}) else: raise NotImplementedError + self.rl_spec = self.env.action_spec() + self.env.reset() + eval_request = ActiveRLEvalRequest() eval_request.policy = self.rl_policy.flatten('F').tolist() eval_request.weights = self.rl_weights + eval_request.nr_steps = self.nr_steps + eval_request.nr_weights = self.nr_weights self.eval_pub.publish(eval_request) - self.get_logger().info('Active RL: Called!') - self.get_logger().info('Active RL: Waiting for Eval!') + self.get_logger().info('Active RL: Display Best Run!') self.policy_sent = True @@ -287,16 +308,23 @@ class ActiveRL(Node): self.rl_reward = 0.0 self.rl_pending = False - elif self.interactive_run == 2: + elif self.interactive_run == 2: # Without display if self.rl_env == "Reacher": np.random.seed(self.rl_seed) random_state = np.random.RandomState(seed=self.rl_seed) self.env = suite.load('reacher', 'hard', task_kwargs={'random': random_state}) - self.rl_spec = self.env.action_spec() - self.env.reset() + elif self.rl_env == "Finger": + np.random.seed(self.rl_seed) + random_state = np.random.RandomState(seed=self.rl_seed) + self.env = suite.load('finger', + 'turn_easy', + task_kwargs={'random': random_state}) else: raise NotImplementedError + self.rl_spec = self.env.action_spec() + self.env.reset() + env_reward, step_count = self.runner(self.rl_policy) rl_response = ActiveRLResponse() @@ -315,6 +343,66 @@ class ActiveRL(Node): self.reset_rl_request() self.rl_pending = False + elif self.interactive_run == 3: # Init Run to Display the first runs,checking pose of the arm and target + if not self.policy_sent: + self.rl_step = 0 + self.rl_reward = 0.0 + + if self.rl_env == "Reacher": + np.random.seed(self.rl_seed) + random_state = np.random.RandomState(seed=self.rl_seed) + self.env = suite.load('reacher', 'hard', task_kwargs={'random': random_state}) + elif self.rl_env == "Finger": + np.random.seed(self.rl_seed) + random_state = np.random.RandomState(seed=self.rl_seed) + self.env = suite.load('finger', + 'turn_easy', + task_kwargs={'random': random_state}) + else: + raise NotImplementedError + + self.rl_spec = self.env.action_spec() + self.env.reset() + + eval_request = ActiveRLEvalRequest() + eval_request.policy = self.rl_policy.flatten('F').tolist() + eval_request.weights = self.rl_weights + eval_request.nr_steps = self.nr_steps + eval_request.nr_weights = self.nr_weights + + self.eval_pub.publish(eval_request) + self.get_logger().info('Active RL: Init Display!') + + self.policy_sent = True + + done = self.step(self.rl_policy, self.display_run) + + if done: + rl_response = ActiveRLResponse() + rl_response.weights = self.rl_weights + rl_response.reward = self.rl_reward + rl_response.final_step = self.rl_step + if self.weight_preference is None: + weight_preference = [False] * len(self.rl_weights) + else: + weight_preference = self.weight_preference + + rl_response.weight_preference = weight_preference + + self.active_rl_pub.publish(rl_response) + + # reset flags and attributes + self.reset_eval_request() + self.reset_rl_request() + + self.rl_step = 0 + self.rl_reward = 0.0 + + self.best_pol_shown = False + self.eval_response_received = False + self.rl_pending = False + + def main(args=None): rclpy.init(args=args)