diff --git a/src/interaction_msgs/action/TaskEvaluation.action b/src/interaction_msgs/action/TaskEvaluation.action index 4a197de..e6362db 100644 --- a/src/interaction_msgs/action/TaskEvaluation.action +++ b/src/interaction_msgs/action/TaskEvaluation.action @@ -7,7 +7,7 @@ uint16 number_of_time_steps # case if user_input is true float32[] user_parameters # Length: number_of_dimensions * number_of_parameters_per_dimension float32[] user_covariance_diag # Length: number_of_dimensions * number_of_parameters_per_dimension -float32[] current_cma_mean # Length: number_of_dimensions * number_of_parameters_per_dimension +float32[] current_optimizer_mean # Length: number_of_dimensions * number_of_parameters_per_dimension float32[] conditional_points # Length: (number_of_dimensions + time_stamp[0,1]) * number_of_conditional_points float32[] weight_parameter # this parameter sets the weighted average 0 dont trust user 1 completly trust user (it is set by the user or it is decays over time i have to do some experiments on that) diff --git a/src/interaction_optimizers/interaction_optimizers/cmaes_optimization_node.py b/src/interaction_optimizers/interaction_optimizers/cmaes_optimization_node.py index 04d8e26..339d96e 100644 --- a/src/interaction_optimizers/interaction_optimizers/cmaes_optimization_node.py +++ b/src/interaction_optimizers/interaction_optimizers/cmaes_optimization_node.py @@ -32,7 +32,7 @@ class CMAESOptimizationNode(Node): self.number_of_parameters_per_dimensions = 10 # the number of weights is double the number of dims * params per dim since its Position and Velocity self.number_of_weights = 2 * self.number_of_dimensions * self.number_of_parameters_per_dimensions - self.user_covariance = np.ones((self.number_of_weights,1)) * self.initial_user_covariance + self.user_covariance = np.ones((self.number_of_weights, 1)) * self.initial_user_covariance.value self.random_seed = None @@ -298,7 +298,7 @@ class CMAESOptimizationNode(Node): try: response = future.result() - if self.episode < self.number_of_initial_episodes: + if self.episode < self.number_of_initial_episodes.value: self.non_interaction() if response.interaction: diff --git a/src/interaction_tasks/interaction_tasks/task_node.py b/src/interaction_tasks/interaction_tasks/task_node.py index da5c6d5..4b9e836 100644 --- a/src/interaction_tasks/interaction_tasks/task_node.py +++ b/src/interaction_tasks/interaction_tasks/task_node.py @@ -21,6 +21,8 @@ class TaskNode(Node): # Task Attributes self.number_of_processed_trajectories = 0 self.goal_dict = {} + self.eval_strategy = 'robot' + self.seed = self.declare_parameter('seed', None) # ROS2 Interfaces self.task_callback_group = ReentrantCallbackGroup() @@ -70,12 +72,85 @@ class TaskNode(Node): self._task_action.destroy() super().destroy_node() + # Helper function + def compute_weighted_mean(self, user_parameters, optimizer_mean, weight_parameter): + # Check if the weight_parameter is between 0 and 1 + if not 0 <= weight_parameter <= 1: + self.get_logger().error(f'Invalid weight parameter for weighted average: {weight_parameter}') + self.error_trigger() + + # Compute the weighted average + weighted_mean = (weight_parameter * user_parameters) + ((1 - weight_parameter) * optimizer_mean) + return weighted_mean + + def compute_promp_trajectory(self, parameter_set, + number_of_dimensions, number_of_parameters_per_dimensions, + duration, number_of_time_steps): + time = np.linspace(0, duration, number_of_time_steps) + promp = ProMP(number_of_dimensions / 2, n_weights_per_dim=number_of_parameters_per_dimensions) + trajectory = promp.trajectory_from_weights(time, parameter_set) + + return trajectory + + # State Methods def on_enter_processing_non_interactive_input(self): - pass + # Unflatten parameter array to get the parameter sets + parameter_sets = unflatten_population(self._goal.parameter_array, + self._goal.number_of_population, + self._goal.number_of_dimensions, + self._goal.number_of_parameters_per_dimensions) + # Compute trajectories for each set + trajectories = [self.compute_promp_trajectory(parameter_sets[i, :, :], + self._goal.number_of_population, + self._goal.number_of_parameters_per_dimensions, + self._goal.duation, + self._goal.number_of_time_steps + ) for i in range(parameter_sets.shape[0])] + + # send the trajectories to the robot or objective function + if self.eval_strategy == 'robot': + # TODO: Implement Action Interface for Robot Evaluation + self.non_interactive_to_robot() + + elif self.eval_strategy == 'obj_fun': + # TODO: Implement Action Interface for Objective Function Evaluation + self.non_interactive_to_obj_fun() + + else: + self.get_logger().error(f"Unknown evaluation strategy: '{self.eval_strategy}'") + self.error_trigger() def on_enter_processing_interactive_input(self): - pass + weigthed_average = self.compute_weighted_mean(self._goal.user_parameters, + self._goal.current_optimizer_mean, + self._goal.weight_parameter) + + promp = ProMP(self._goal.number_of_dimensions / 2, + n_weights_per_dim=self._goal.number_of_parameters_per_dimensions) + + promp.from_weight_distribution(weigthed_average, + self._goal.user_covariance_diag) + + if self._goal.conditional_points is not None: + # TODO: Fix the Action to add condititional cov + pass + time = np.linspace(0, self._goal.duration, self._goal.number_of_time_steps) + random_state = np.random.RandomState(self.seed.value) + trajectories = promp.sample_trajectories(time, self._goal.number_of_population, random_state) + + # send the trajectories to the robot or objective function + if self.eval_strategy == 'robot': + # TODO: Implement Action Interface for Robot Evaluation + self.interactive_to_robot() + + elif self.eval_strategy == 'obj_fun': + # TODO: Implement Action Interface for Objective Function Evaluation + self.interactive_to_obj_fun() + + else: + self.get_logger().error(f"Unknown evaluation strategy: '{self.eval_strategy}'") + self.error_trigger() def on_enter_waiting_for_robot_response(self): pass @@ -83,7 +158,6 @@ class TaskNode(Node): def on_enter_waiting_for_objective_function_response(self): pass - # Callback functions def _task_goal_callback(self, goal): self._goal = goal