Franka experiment complete + debugged

This commit is contained in:
Niko Feith 2023-09-15 17:36:40 +02:00
parent a367ee2d98
commit ef6df077d7
4 changed files with 382 additions and 17 deletions

View File

@ -0,0 +1,65 @@
import numpy as np
class GaussianRBF:
def __init__(self, nr_steps, nr_weights, nr_dims, lower_bound=None, upper_bound=None, seed=None, fixed_dims=None):
self.nr_weights = nr_weights
self.nr_steps = nr_steps
self.nr_dims = nr_dims
self.weights = None
self.trajectory = None
if lower_bound is None:
self.lower_bound = [-1.]*nr_dims
else:
self.lower_bound = lower_bound
if upper_bound is None:
self.upper_bound = [1.]*nr_dims
else:
self.upper_bound = upper_bound
self.rng = np.random.default_rng(seed=seed)
self.fixed_dims = fixed_dims
# initialize
self.mid_points = np.linspace(0, self.nr_steps, self.nr_weights)
if nr_weights > 1:
self.std = self.mid_points[1] / (2 * np.sqrt(2 * np.log(2))) # Full width at half maximum
else:
self.std = self.nr_steps / 2
self.reset()
def reset(self):
self.weights = np.zeros((self.nr_weights, self.nr_dims))
self.trajectory = np.zeros((self.nr_steps, self.nr_dims))
def random_weights(self):
for w in range(self.nr_weights):
# If dim exists in fixed_dims, set weights directly
if w in self.fixed_dims:
self.weights[w, :] = np.array(self.fixed_dims[w])
else:
for dim in range(self.nr_dims):
self.weights[w, dim] = self.rng.uniform(self.lower_bound[dim], self.upper_bound[dim], 1)
return self.weights
def rollout(self):
self.trajectory = np.zeros((self.nr_steps, self.nr_dims))
for step in range(self.nr_steps):
for weight in range(self.nr_weights):
base_fun = np.exp(-0.5 * (step - self.mid_points[weight]) ** 2 / self.std ** 2)
for dim in range(self.nr_dims):
self.trajectory[step, dim] += base_fun * self.weights[weight, dim]
return self.trajectory
def set_weights(self, x):
self.weights = x.reshape((self.nr_weights, self.nr_dims), order='F')
def get_x(self):
return self.weights.flatten('F')

View File

@ -10,8 +10,9 @@ from active_bo_msgs.msg import ActiveRLResponse
from active_bo_msgs.msg import ActiveRLEvalRequest from active_bo_msgs.msg import ActiveRLEvalRequest
from active_bo_msgs.msg import ActiveRLEvalResponse from active_bo_msgs.msg import ActiveRLEvalResponse
from franka_iml_experiment.RepresentationModel.DMP import * # from franka_iml_experiment.RepresentationModel.DMP import DmpDiscrete
import pydmps
import pydmps.dmp_discrete
import numpy as np import numpy as np
@ -29,7 +30,7 @@ class IML_Experiment(Node):
callback_group=rl_callback_group) callback_group=rl_callback_group)
self.exec_pub = self.create_publisher(Empty, self.exec_pub = self.create_publisher(Empty,
'moveit_interface/execute', 'moveit_interface/execution',
10) 10)
self.rl_subscription = self.create_subscription(DMP, self.rl_subscription = self.create_subscription(DMP,
@ -58,8 +59,8 @@ class IML_Experiment(Node):
self.eval_pending = False self.eval_pending = False
# Reward constants # Reward constants
self.distance_pen = -0.1 self.distance_pen = -0.01
self.accel_pen = -0.1 self.accel_pen = -0.01
self.time_pen = -1. self.time_pen = -1.
self.zone_pen = -20. self.zone_pen = -20.
self.goal_rew = 10. self.goal_rew = 10.
@ -71,25 +72,29 @@ class IML_Experiment(Node):
# BO parameters # BO parameters
self.start = np.array([0.5, -0.3]) self.start = np.array([0.5, -0.3])
self.end = np.array([0.5, 0.3]) self.end = np.array([0.51, 0.3])
self.time = 10 self.time = 5.
self.nr_dim = 2 self.nr_dim = 2
self.nr_bfs = None self.nr_bfs = None
self.nr_steps = None self.nr_steps = 20
self.weight_preference = None self.weight_preference = None
self.weight_scaling = 1000 self.weight_scaling = 100
self.dt = 1/(self.time * self.nr_steps)
# Dmp
# self.dmp = DmpDiscrete(nr_dmps=self.nr_dim, nr_bfs=self.nr_bfs, dt=self.dt, tau=self.time)
# self.dmp.cs.roll_out()
def dmp_callback(self, msg): def dmp_callback(self, msg):
start = self.start
end = np.array([0.5, -0.3])
self.nr_bfs = msg.nr_bfs self.nr_bfs = msg.nr_bfs
self.nr_steps = msg.nr_steps self.nr_steps = msg.nr_steps
interactive_run = msg.interactive_run interactive_run = msg.interactive_run
weights = np.vstack((msg.p_x, msg.p_y)) * self.weight_scaling weights = np.vstack((msg.p_x, msg.p_y)) * self.weight_scaling
weights_zeros = np.zeros_like(weights) self.get_logger().info(f'{weights}')
dmp = pydmps.dmp_discrete.DMPs_discrete(n_dmps=self.nr_dim, n_bfs=self.nr_bfs, w=weights_zeros, y0=start, goal=end) dmp = pydmps.dmp_discrete.DMPs_discrete(n_dmps=self.nr_dim, n_bfs=self.nr_bfs, w=weights,
y0=self.start, goal=self.end)
y_track, _, ddy_track = dmp.rollout() y_track, _, ddy_track = dmp.rollout()
if interactive_run != 0: if interactive_run != 0:
@ -125,6 +130,7 @@ class IML_Experiment(Node):
rl_response.weight_preference = weight_preference rl_response.weight_preference = weight_preference
self.active_rl_pub.publish(rl_response) self.active_rl_pub.publish(rl_response)
self.get_logger().info(f'Before:{msg.p_x + msg.p_y}')
else: else:
self.get_logger().info('Interactive Learning started!') self.get_logger().info('Interactive Learning started!')
@ -137,17 +143,32 @@ class IML_Experiment(Node):
self.eval_pub.publish(eval_msg) self.eval_pub.publish(eval_msg)
self.eval_pending = True self.eval_pending = True
pose_msg = PoseArray()
for i in range(y_track.shape[0]):
pose = Pose()
pose.position.x = y_track[i, 0]
pose.position.y = y_track[i, 1]
pose.position.z = 0.15
pose.orientation.x = 1.
pose.orientation.y = 0.
pose.orientation.z = 0.
pose.orientation.w = 0.
pose_msg.poses.append(pose)
self.traj_pub.publish(pose_msg)
def rl_eval_callback(self, msg): def rl_eval_callback(self, msg):
if self.eval_pending: if self.eval_pending:
self.eval_pending = False self.eval_pending = False
eval_weights = np.array(msg.weights).reshape((self.nr_bfs, self.nr_dim), order='F').T * self.weight_scaling eval_weights = np.array(msg.weights).reshape((self.nr_bfs, self.nr_dim), order='F').T * self.weight_scaling
self.weight_preference = msg.weight_preference self.weight_preference = msg.weight_preference
dmp = pydmps.dmp_discrete.DMPs_discrete(n_dmps=self.nr_dim, n_bfs=self.nr_bfs, w=eval_weights, y0=self.start, dmp = pydmps.dmp_discrete.DMPs_discrete(n_dmps=self.nr_dim, n_bfs=self.nr_bfs, w=eval_weights,
goal=self.end) y0=self.start, goal=self.end)
y_track, _, ddy_track = dmp.rollout() y_track, _, ddy_track = dmp.rollout()
self.get_logger().info('Active RL Eval: Responded!') self.get_logger().info('Active RL Eval: Responded!')
self.get_logger().info(f'After{msg.weights}')
reward, not_perform = self.compute_reward(y_track, ddy_track, self.nr_steps) reward, not_perform = self.compute_reward(y_track, ddy_track, self.nr_steps)
self.get_logger().info(f'{reward}') self.get_logger().info(f'{reward}')
@ -197,7 +218,6 @@ class IML_Experiment(Node):
# 2. Check if any (x,y) position is within the circle # 2. Check if any (x,y) position is within the circle
t = 0 t = 0
for i in range(y_track.shape[0]): for i in range(y_track.shape[0]):
self.get_logger().info(f'{y_track[i, :]}')
distance_to_circle_center = np.linalg.norm(y_track[i, :] - self.end) distance_to_circle_center = np.linalg.norm(y_track[i, :] - self.end)
if distance_to_circle_center < self.circle_radius: if distance_to_circle_center < self.circle_radius:
t = i t = i
@ -207,6 +227,7 @@ class IML_Experiment(Node):
y_track = y_track[:t, :] y_track = y_track[:t, :]
ddy_track = ddy_track[:t, :] ddy_track = ddy_track[:t, :]
self.get_logger().info(f'{y_track.shape}')
penalty = t * self.time_pen penalty = t * self.time_pen

View File

@ -0,0 +1,279 @@
import rclpy
from rclpy.node import Node
from rclpy.callback_groups import ReentrantCallbackGroup
from std_msgs.msg import Empty
from geometry_msgs.msg import PoseArray
from geometry_msgs.msg import Pose
from active_bo_msgs.msg import ActiveRLRequest
from active_bo_msgs.msg import ActiveRLResponse
from active_bo_msgs.msg import ActiveRLEvalRequest
from active_bo_msgs.msg import ActiveRLEvalResponse
from franka_iml_experiment.RepresentationModel.GaussianModelMultiDim import GaussianRBF
import numpy as np
class IML_Experiment(Node):
def __init__(self):
super().__init__('iml_node')
rl_callback_group = ReentrantCallbackGroup()
topic_callback_group = ReentrantCallbackGroup()
self.traj_pub = self.create_publisher(PoseArray,
'moveit_interface/task_space_trajectory',
10,
callback_group=rl_callback_group)
self.exec_pub = self.create_publisher(Empty,
'moveit_interface/execution',
10)
self.rl_subscription = self.create_subscription(ActiveRLRequest,
'active_rl_request',
self.rl_callback,
10,
callback_group=rl_callback_group)
self.active_rl_pub = self.create_publisher(ActiveRLResponse,
'active_rl_response',
1,
callback_group=rl_callback_group)
# interactive part
self.eval_pub = self.create_publisher(ActiveRLEvalRequest,
'active_rl_eval_request',
1,
callback_group=topic_callback_group)
self.eval_sub = self.create_subscription(ActiveRLEvalResponse,
'active_rl_eval_response',
self.rl_eval_callback,
1,
callback_group=topic_callback_group)
# States
self.eval_pending = False
# Reward constants
self.distance_pen = -0.01
self.accel_pen = -0.01
self.time_pen = -1.
self.zone_pen = -20.
self.goal_rew = 10.
# Scene parameters
self.square_center = np.array([0.5, 0.0])
self.square_half_side = 0.1 # half of the side length
self.circle_radius = 0.02
# BO parameters
self.start = {0: [0.5, -0.3]}
self.end = [0.5, 0.3]
self.nr_dims = None
self.nr_weights = None
self.nr_steps = None
self.weight_preference = None
self.weights = None
self.policy = None
# Policy parameter
self.policy_model = None
self.lower_bound = -1.0
self.upper_bound = 1.0
self.seed = None
def rl_callback(self, msg):
interactive_run = msg.interactive_run
self.nr_weights = msg.nr_weights
self.nr_steps = msg.nr_steps
self.nr_dims = msg.nr_dims
weight_dims = (self.nr_weights, self.nr_dims)
self.weights = np.array(msg.weights, dtype=np.float64).reshape(weight_dims, order='F')
self.policy_model = GaussianRBF(self.nr_steps, self.nr_weights, self.nr_dims,
lower_bound=self.lower_bound, upper_bound=self.upper_bound, seed=self.seed,
fixed_dims=self.start)
self.policy_model.set_weights(np.around(self.weights, decimals=8))
self.policy = self.policy_model.rollout()
reward, not_perform, terminate_time = self.compute_reward(self.policy, self.nr_steps)
short_policy = self.policy[:terminate_time, :]
if interactive_run == 2:
if not not_perform:
pose_msg = PoseArray()
for i in range(short_policy.shape[0]):
pose = Pose()
pose.position.x = short_policy[i, 0]
pose.position.y = short_policy[i, 1]
pose.position.z = 0.15
pose.orientation.x = 1.
pose.orientation.y = 0.
pose.orientation.z = 0.
pose.orientation.w = 0.
pose_msg.poses.append(pose)
self.traj_pub.publish(pose_msg)
else:
self.get_logger().info(f'Trajectory is not valid!')
if interactive_run == 1:
if not not_perform:
pose_msg = PoseArray()
for i in range(short_policy.shape[0]):
pose = Pose()
pose.position.x = short_policy[i, 0]
pose.position.y = short_policy[i, 1]
pose.position.z = 0.15
pose.orientation.x = 1.
pose.orientation.y = 0.
pose.orientation.z = 0.
pose.orientation.w = 0.
pose_msg.poses.append(pose)
# self.traj_pub.publish(pose_msg)
# self.exec_pub.publish(Empty())
rl_response = ActiveRLResponse()
rl_response.weights = msg.weights
rl_response.reward = reward
rl_response.final_step = self.nr_steps
if self.weight_preference is None:
weight_preference = [False] * self.nr_weights * 2
else:
weight_preference = self.weight_preference
rl_response.weight_preference = weight_preference
self.active_rl_pub.publish(rl_response)
else:
self.get_logger().info('Interactive Learning started!')
eval_msg = ActiveRLEvalRequest()
eval_msg.weights = self.weights.flatten('F').tolist()
eval_msg.policy = self.policy_model.rollout().flatten('F').tolist()
eval_msg.nr_steps = self.nr_steps
eval_msg.nr_weights = self.nr_weights
self.eval_pub.publish(eval_msg)
self.eval_pending = True
pose_msg = PoseArray()
for i in range(short_policy.shape[0]):
pose = Pose()
pose.position.x = short_policy[i, 0]
pose.position.y = short_policy[i, 1]
pose.position.z = 0.15
pose.orientation.x = 1.
pose.orientation.y = 0.
pose.orientation.z = 0.
pose.orientation.w = 0.
pose_msg.poses.append(pose)
self.traj_pub.publish(pose_msg)
def rl_eval_callback(self, msg):
if self.eval_pending:
self.eval_pending = False
weight_dims = (self.nr_weights, self.nr_dims)
self.weights = np.array(msg.weights, dtype=np.float64).reshape(weight_dims, order='F')
self.policy_model.set_weights(np.around(self.weights, decimals=8))
self.policy = self.policy_model.rollout()
self.weight_preference = msg.weight_preference
self.get_logger().info('Active RL Eval: Responded!')
reward, not_perform, terminate_time = self.compute_reward(self.policy, self.nr_steps)
self.get_logger().info(f'Current Reward: {reward}')
short_policy = self.policy[:terminate_time, :]
if not not_perform:
pose_msg = PoseArray()
for i in range(short_policy.shape[0]):
pose = Pose()
pose.position.x = short_policy[i, 0]
pose.position.y = short_policy[i, 1]
pose.position.z = 0.15
pose.orientation.x = 1.
pose.orientation.y = 0.
pose.orientation.z = 0.
pose.orientation.w = 0.
pose_msg.poses.append(pose)
self.traj_pub.publish(pose_msg)
# self.exec_pub.publish(Empty())
rl_response = ActiveRLResponse()
rl_response.weights = msg.weights
rl_response.reward = reward
rl_response.final_step = self.nr_steps
if self.weight_preference is None:
weight_preference = [False] * self.nr_weights * 2
else:
weight_preference = self.weight_preference
rl_response.weight_preference = weight_preference
self.active_rl_pub.publish(rl_response)
def compute_reward(self, y_track, time):
# 1. Check if any (x,y) position is within the square
within_square = np.any(
(y_track[:, 0] > self.square_center[0] - self.square_half_side) &
(y_track[:, 0] < self.square_center[0] + self.square_half_side) &
(y_track[:, 1] > self.square_center[1] - self.square_half_side) &
(y_track[:, 1] < self.square_center[1] + self.square_half_side)
)
# If the robot moves into the forbidden zone it won't perform trajectory and get penalized
if within_square:
return self.zone_pen + time * self.time_pen, within_square, 0
# 2. Check if any (x,y) position is within the circle
t = 0
for i in range(y_track.shape[0]):
distance_to_circle_center = np.linalg.norm(y_track[i, :] - self.end)
if distance_to_circle_center < self.circle_radius:
t = i
break
if t == 0:
t = y_track.shape[0]
y_track = y_track[:t, :]
penalty = t * self.time_pen
# 3. Compute the Euclidean distance from each (x,y) position to the center of the square
distances_to_square_center = np.linalg.norm(y_track - self.square_center, axis=1)
total_distance_norm = np.sum(distances_to_square_center)
penalty += total_distance_norm * self.distance_pen
return penalty, within_square, t
def main(args=None):
rclpy.init(args=args)
dmp_node = IML_Experiment()
rclpy.spin(dmp_node)
dmp_node.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@ -28,7 +28,7 @@ setup(
'dmp_node = franka_iml_experiment.dmp_node:main', 'dmp_node = franka_iml_experiment.dmp_node:main',
'dmp_test = franka_iml_experiment.dmp_test:main', 'dmp_test = franka_iml_experiment.dmp_test:main',
'move_square = franka_iml_experiment.move_square:main', 'move_square = franka_iml_experiment.move_square:main',
'iml_experiment = franka_iml_experiment.iml_experiment_node:main', 'iml_experiment = franka_iml_experiment.iml_experiment_nodmp:main',
], ],
}, },
) )