From ef6df077d7f01e5c5337cd33932adcc28e48485c Mon Sep 17 00:00:00 2001 From: Niko Date: Fri, 15 Sep 2023 17:36:40 +0200 Subject: [PATCH] Franka experiment complete + debugged --- .../GaussianModelMultiDim.py | 65 ++++ franka_iml_experiment/iml_experiment_node.py | 53 +++- franka_iml_experiment/iml_experiment_nodmp.py | 279 ++++++++++++++++++ setup.py | 2 +- 4 files changed, 382 insertions(+), 17 deletions(-) create mode 100644 franka_iml_experiment/RepresentationModel/GaussianModelMultiDim.py create mode 100644 franka_iml_experiment/iml_experiment_nodmp.py diff --git a/franka_iml_experiment/RepresentationModel/GaussianModelMultiDim.py b/franka_iml_experiment/RepresentationModel/GaussianModelMultiDim.py new file mode 100644 index 0000000..15ba4f7 --- /dev/null +++ b/franka_iml_experiment/RepresentationModel/GaussianModelMultiDim.py @@ -0,0 +1,65 @@ + +import numpy as np + + +class GaussianRBF: + def __init__(self, nr_steps, nr_weights, nr_dims, lower_bound=None, upper_bound=None, seed=None, fixed_dims=None): + self.nr_weights = nr_weights + self.nr_steps = nr_steps + self.nr_dims = nr_dims + + self.weights = None + self.trajectory = None + + if lower_bound is None: + self.lower_bound = [-1.]*nr_dims + else: + self.lower_bound = lower_bound + + if upper_bound is None: + self.upper_bound = [1.]*nr_dims + else: + self.upper_bound = upper_bound + + self.rng = np.random.default_rng(seed=seed) + + self.fixed_dims = fixed_dims + + # initialize + self.mid_points = np.linspace(0, self.nr_steps, self.nr_weights) + if nr_weights > 1: + self.std = self.mid_points[1] / (2 * np.sqrt(2 * np.log(2))) # Full width at half maximum + else: + self.std = self.nr_steps / 2 + + self.reset() + + def reset(self): + self.weights = np.zeros((self.nr_weights, self.nr_dims)) + self.trajectory = np.zeros((self.nr_steps, self.nr_dims)) + + def random_weights(self): + for w in range(self.nr_weights): + # If dim exists in fixed_dims, set weights directly + if w in self.fixed_dims: + self.weights[w, :] = np.array(self.fixed_dims[w]) + else: + for dim in range(self.nr_dims): + self.weights[w, dim] = self.rng.uniform(self.lower_bound[dim], self.upper_bound[dim], 1) + return self.weights + + def rollout(self): + self.trajectory = np.zeros((self.nr_steps, self.nr_dims)) + for step in range(self.nr_steps): + for weight in range(self.nr_weights): + base_fun = np.exp(-0.5 * (step - self.mid_points[weight]) ** 2 / self.std ** 2) + for dim in range(self.nr_dims): + self.trajectory[step, dim] += base_fun * self.weights[weight, dim] + + return self.trajectory + + def set_weights(self, x): + self.weights = x.reshape((self.nr_weights, self.nr_dims), order='F') + + def get_x(self): + return self.weights.flatten('F') diff --git a/franka_iml_experiment/iml_experiment_node.py b/franka_iml_experiment/iml_experiment_node.py index 4779792..bad8961 100644 --- a/franka_iml_experiment/iml_experiment_node.py +++ b/franka_iml_experiment/iml_experiment_node.py @@ -10,8 +10,9 @@ from active_bo_msgs.msg import ActiveRLResponse from active_bo_msgs.msg import ActiveRLEvalRequest from active_bo_msgs.msg import ActiveRLEvalResponse -from franka_iml_experiment.RepresentationModel.DMP import * - +# from franka_iml_experiment.RepresentationModel.DMP import DmpDiscrete +import pydmps +import pydmps.dmp_discrete import numpy as np @@ -29,7 +30,7 @@ class IML_Experiment(Node): callback_group=rl_callback_group) self.exec_pub = self.create_publisher(Empty, - 'moveit_interface/execute', + 'moveit_interface/execution', 10) self.rl_subscription = self.create_subscription(DMP, @@ -58,8 +59,8 @@ class IML_Experiment(Node): self.eval_pending = False # Reward constants - self.distance_pen = -0.1 - self.accel_pen = -0.1 + self.distance_pen = -0.01 + self.accel_pen = -0.01 self.time_pen = -1. self.zone_pen = -20. self.goal_rew = 10. @@ -71,25 +72,29 @@ class IML_Experiment(Node): # BO parameters self.start = np.array([0.5, -0.3]) - self.end = np.array([0.5, 0.3]) - self.time = 10 + self.end = np.array([0.51, 0.3]) + self.time = 5. self.nr_dim = 2 self.nr_bfs = None - self.nr_steps = None + self.nr_steps = 20 self.weight_preference = None - self.weight_scaling = 1000 + self.weight_scaling = 100 + self.dt = 1/(self.time * self.nr_steps) + + # Dmp + # self.dmp = DmpDiscrete(nr_dmps=self.nr_dim, nr_bfs=self.nr_bfs, dt=self.dt, tau=self.time) + # self.dmp.cs.roll_out() def dmp_callback(self, msg): - start = self.start - end = np.array([0.5, -0.3]) self.nr_bfs = msg.nr_bfs self.nr_steps = msg.nr_steps interactive_run = msg.interactive_run weights = np.vstack((msg.p_x, msg.p_y)) * self.weight_scaling - weights_zeros = np.zeros_like(weights) + self.get_logger().info(f'{weights}') - dmp = pydmps.dmp_discrete.DMPs_discrete(n_dmps=self.nr_dim, n_bfs=self.nr_bfs, w=weights_zeros, y0=start, goal=end) + dmp = pydmps.dmp_discrete.DMPs_discrete(n_dmps=self.nr_dim, n_bfs=self.nr_bfs, w=weights, + y0=self.start, goal=self.end) y_track, _, ddy_track = dmp.rollout() if interactive_run != 0: @@ -125,6 +130,7 @@ class IML_Experiment(Node): rl_response.weight_preference = weight_preference self.active_rl_pub.publish(rl_response) + self.get_logger().info(f'Before:{msg.p_x + msg.p_y}') else: self.get_logger().info('Interactive Learning started!') @@ -137,17 +143,32 @@ class IML_Experiment(Node): self.eval_pub.publish(eval_msg) self.eval_pending = True + pose_msg = PoseArray() + for i in range(y_track.shape[0]): + pose = Pose() + pose.position.x = y_track[i, 0] + pose.position.y = y_track[i, 1] + pose.position.z = 0.15 + pose.orientation.x = 1. + pose.orientation.y = 0. + pose.orientation.z = 0. + pose.orientation.w = 0. + pose_msg.poses.append(pose) + + self.traj_pub.publish(pose_msg) + def rl_eval_callback(self, msg): if self.eval_pending: self.eval_pending = False eval_weights = np.array(msg.weights).reshape((self.nr_bfs, self.nr_dim), order='F').T * self.weight_scaling self.weight_preference = msg.weight_preference - dmp = pydmps.dmp_discrete.DMPs_discrete(n_dmps=self.nr_dim, n_bfs=self.nr_bfs, w=eval_weights, y0=self.start, - goal=self.end) + dmp = pydmps.dmp_discrete.DMPs_discrete(n_dmps=self.nr_dim, n_bfs=self.nr_bfs, w=eval_weights, + y0=self.start, goal=self.end) y_track, _, ddy_track = dmp.rollout() self.get_logger().info('Active RL Eval: Responded!') + self.get_logger().info(f'After{msg.weights}') reward, not_perform = self.compute_reward(y_track, ddy_track, self.nr_steps) self.get_logger().info(f'{reward}') @@ -197,7 +218,6 @@ class IML_Experiment(Node): # 2. Check if any (x,y) position is within the circle t = 0 for i in range(y_track.shape[0]): - self.get_logger().info(f'{y_track[i, :]}') distance_to_circle_center = np.linalg.norm(y_track[i, :] - self.end) if distance_to_circle_center < self.circle_radius: t = i @@ -207,6 +227,7 @@ class IML_Experiment(Node): y_track = y_track[:t, :] ddy_track = ddy_track[:t, :] + self.get_logger().info(f'{y_track.shape}') penalty = t * self.time_pen diff --git a/franka_iml_experiment/iml_experiment_nodmp.py b/franka_iml_experiment/iml_experiment_nodmp.py new file mode 100644 index 0000000..a73b572 --- /dev/null +++ b/franka_iml_experiment/iml_experiment_nodmp.py @@ -0,0 +1,279 @@ +import rclpy +from rclpy.node import Node +from rclpy.callback_groups import ReentrantCallbackGroup + +from std_msgs.msg import Empty +from geometry_msgs.msg import PoseArray +from geometry_msgs.msg import Pose +from active_bo_msgs.msg import ActiveRLRequest +from active_bo_msgs.msg import ActiveRLResponse +from active_bo_msgs.msg import ActiveRLEvalRequest +from active_bo_msgs.msg import ActiveRLEvalResponse + +from franka_iml_experiment.RepresentationModel.GaussianModelMultiDim import GaussianRBF + +import numpy as np + + +class IML_Experiment(Node): + + def __init__(self): + super().__init__('iml_node') + + rl_callback_group = ReentrantCallbackGroup() + topic_callback_group = ReentrantCallbackGroup() + + self.traj_pub = self.create_publisher(PoseArray, + 'moveit_interface/task_space_trajectory', + 10, + callback_group=rl_callback_group) + + self.exec_pub = self.create_publisher(Empty, + 'moveit_interface/execution', + 10) + + self.rl_subscription = self.create_subscription(ActiveRLRequest, + 'active_rl_request', + self.rl_callback, + 10, + callback_group=rl_callback_group) + + self.active_rl_pub = self.create_publisher(ActiveRLResponse, + 'active_rl_response', + 1, + callback_group=rl_callback_group) + + # interactive part + self.eval_pub = self.create_publisher(ActiveRLEvalRequest, + 'active_rl_eval_request', + 1, + callback_group=topic_callback_group) + self.eval_sub = self.create_subscription(ActiveRLEvalResponse, + 'active_rl_eval_response', + self.rl_eval_callback, + 1, + callback_group=topic_callback_group) + + # States + self.eval_pending = False + + # Reward constants + self.distance_pen = -0.01 + self.accel_pen = -0.01 + self.time_pen = -1. + self.zone_pen = -20. + self.goal_rew = 10. + + # Scene parameters + self.square_center = np.array([0.5, 0.0]) + self.square_half_side = 0.1 # half of the side length + self.circle_radius = 0.02 + + # BO parameters + self.start = {0: [0.5, -0.3]} + self.end = [0.5, 0.3] + self.nr_dims = None + self.nr_weights = None + self.nr_steps = None + self.weight_preference = None + self.weights = None + self.policy = None + + # Policy parameter + self.policy_model = None + self.lower_bound = -1.0 + self.upper_bound = 1.0 + self.seed = None + + def rl_callback(self, msg): + interactive_run = msg.interactive_run + + self.nr_weights = msg.nr_weights + self.nr_steps = msg.nr_steps + self.nr_dims = msg.nr_dims + + weight_dims = (self.nr_weights, self.nr_dims) + + self.weights = np.array(msg.weights, dtype=np.float64).reshape(weight_dims, order='F') + + self.policy_model = GaussianRBF(self.nr_steps, self.nr_weights, self.nr_dims, + lower_bound=self.lower_bound, upper_bound=self.upper_bound, seed=self.seed, + fixed_dims=self.start) + + self.policy_model.set_weights(np.around(self.weights, decimals=8)) + self.policy = self.policy_model.rollout() + + reward, not_perform, terminate_time = self.compute_reward(self.policy, self.nr_steps) + + short_policy = self.policy[:terminate_time, :] + + if interactive_run == 2: + if not not_perform: + pose_msg = PoseArray() + for i in range(short_policy.shape[0]): + pose = Pose() + pose.position.x = short_policy[i, 0] + pose.position.y = short_policy[i, 1] + pose.position.z = 0.15 + pose.orientation.x = 1. + pose.orientation.y = 0. + pose.orientation.z = 0. + pose.orientation.w = 0. + pose_msg.poses.append(pose) + + self.traj_pub.publish(pose_msg) + else: + self.get_logger().info(f'Trajectory is not valid!') + + if interactive_run == 1: + if not not_perform: + pose_msg = PoseArray() + for i in range(short_policy.shape[0]): + pose = Pose() + pose.position.x = short_policy[i, 0] + pose.position.y = short_policy[i, 1] + pose.position.z = 0.15 + pose.orientation.x = 1. + pose.orientation.y = 0. + pose.orientation.z = 0. + pose.orientation.w = 0. + pose_msg.poses.append(pose) + + # self.traj_pub.publish(pose_msg) + # self.exec_pub.publish(Empty()) + + rl_response = ActiveRLResponse() + rl_response.weights = msg.weights + rl_response.reward = reward + rl_response.final_step = self.nr_steps + if self.weight_preference is None: + weight_preference = [False] * self.nr_weights * 2 + else: + weight_preference = self.weight_preference + + rl_response.weight_preference = weight_preference + + self.active_rl_pub.publish(rl_response) + + else: + self.get_logger().info('Interactive Learning started!') + eval_msg = ActiveRLEvalRequest() + eval_msg.weights = self.weights.flatten('F').tolist() + eval_msg.policy = self.policy_model.rollout().flatten('F').tolist() + eval_msg.nr_steps = self.nr_steps + eval_msg.nr_weights = self.nr_weights + + self.eval_pub.publish(eval_msg) + self.eval_pending = True + + pose_msg = PoseArray() + for i in range(short_policy.shape[0]): + pose = Pose() + pose.position.x = short_policy[i, 0] + pose.position.y = short_policy[i, 1] + pose.position.z = 0.15 + pose.orientation.x = 1. + pose.orientation.y = 0. + pose.orientation.z = 0. + pose.orientation.w = 0. + pose_msg.poses.append(pose) + + self.traj_pub.publish(pose_msg) + + def rl_eval_callback(self, msg): + if self.eval_pending: + self.eval_pending = False + weight_dims = (self.nr_weights, self.nr_dims) + self.weights = np.array(msg.weights, dtype=np.float64).reshape(weight_dims, order='F') + + self.policy_model.set_weights(np.around(self.weights, decimals=8)) + self.policy = self.policy_model.rollout() + + self.weight_preference = msg.weight_preference + + self.get_logger().info('Active RL Eval: Responded!') + + reward, not_perform, terminate_time = self.compute_reward(self.policy, self.nr_steps) + self.get_logger().info(f'Current Reward: {reward}') + + short_policy = self.policy[:terminate_time, :] + + if not not_perform: + pose_msg = PoseArray() + for i in range(short_policy.shape[0]): + pose = Pose() + pose.position.x = short_policy[i, 0] + pose.position.y = short_policy[i, 1] + pose.position.z = 0.15 + pose.orientation.x = 1. + pose.orientation.y = 0. + pose.orientation.z = 0. + pose.orientation.w = 0. + pose_msg.poses.append(pose) + + self.traj_pub.publish(pose_msg) + # self.exec_pub.publish(Empty()) + + rl_response = ActiveRLResponse() + rl_response.weights = msg.weights + rl_response.reward = reward + rl_response.final_step = self.nr_steps + if self.weight_preference is None: + weight_preference = [False] * self.nr_weights * 2 + else: + weight_preference = self.weight_preference + + rl_response.weight_preference = weight_preference + + self.active_rl_pub.publish(rl_response) + + def compute_reward(self, y_track, time): + # 1. Check if any (x,y) position is within the square + within_square = np.any( + (y_track[:, 0] > self.square_center[0] - self.square_half_side) & + (y_track[:, 0] < self.square_center[0] + self.square_half_side) & + (y_track[:, 1] > self.square_center[1] - self.square_half_side) & + (y_track[:, 1] < self.square_center[1] + self.square_half_side) + ) + + # If the robot moves into the forbidden zone it won't perform trajectory and get penalized + if within_square: + return self.zone_pen + time * self.time_pen, within_square, 0 + + # 2. Check if any (x,y) position is within the circle + t = 0 + for i in range(y_track.shape[0]): + distance_to_circle_center = np.linalg.norm(y_track[i, :] - self.end) + if distance_to_circle_center < self.circle_radius: + t = i + break + + if t == 0: + t = y_track.shape[0] + + y_track = y_track[:t, :] + + penalty = t * self.time_pen + + # 3. Compute the Euclidean distance from each (x,y) position to the center of the square + distances_to_square_center = np.linalg.norm(y_track - self.square_center, axis=1) + total_distance_norm = np.sum(distances_to_square_center) + + penalty += total_distance_norm * self.distance_pen + + return penalty, within_square, t + + +def main(args=None): + rclpy.init(args=args) + + dmp_node = IML_Experiment() + + rclpy.spin(dmp_node) + + dmp_node.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py index 320b381..5b89959 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ setup( 'dmp_node = franka_iml_experiment.dmp_node:main', 'dmp_test = franka_iml_experiment.dmp_test:main', 'move_square = franka_iml_experiment.move_square:main', - 'iml_experiment = franka_iml_experiment.iml_experiment_node:main', + 'iml_experiment = franka_iml_experiment.iml_experiment_nodmp:main', ], }, )