Reacher Experiments
Finger Env added
This commit is contained in:
parent
569a4c2217
commit
413d195581
@ -114,7 +114,7 @@ class BayesianOptimization:
|
|||||||
def get_best_result(self):
|
def get_best_result(self):
|
||||||
y_max = np.max(self.Y)
|
y_max = np.max(self.Y)
|
||||||
idx = np.argmax(self.Y)
|
idx = np.argmax(self.Y)
|
||||||
x_max = self.X[idx, :]
|
x_max = self.X[idx, :].reshape((self.nr_weights, self.nr_dims), order='F')
|
||||||
|
|
||||||
return y_max, x_max, idx
|
return y_max, x_max, idx
|
||||||
|
|
||||||
|
@ -152,12 +152,6 @@ class ActiveBOTopic(Node):
|
|||||||
else:
|
else:
|
||||||
self.seed = None
|
self.seed = None
|
||||||
|
|
||||||
# set rl environment
|
|
||||||
if self.bo_env == "Reacher":
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def reset_rl_response(self):
|
def reset_rl_response(self):
|
||||||
self.rl_weights = None
|
self.rl_weights = None
|
||||||
self.rl_final_step = None
|
self.rl_final_step = None
|
||||||
@ -222,8 +216,8 @@ class ActiveBOTopic(Node):
|
|||||||
rl_msg = ActiveRLRequest()
|
rl_msg = ActiveRLRequest()
|
||||||
rl_msg.env = self.bo_env
|
rl_msg.env = self.bo_env
|
||||||
rl_msg.seed = seed
|
rl_msg.seed = seed
|
||||||
rl_msg.display_run = False
|
rl_msg.display_run = True
|
||||||
rl_msg.interactive_run = 2
|
rl_msg.interactive_run = 3
|
||||||
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
|
rl_msg.weights = self.BO.policy_model.random_weights().flatten('F').tolist()
|
||||||
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
rl_msg.policy = self.BO.policy_model.rollout().flatten('F').tolist()
|
||||||
rl_msg.nr_weights = self.bo_nr_weights
|
rl_msg.nr_weights = self.bo_nr_weights
|
||||||
@ -243,12 +237,14 @@ class ActiveBOTopic(Node):
|
|||||||
bo_response.nr_weights = self.bo_nr_weights
|
bo_response.nr_weights = self.bo_nr_weights
|
||||||
bo_response.nr_steps = self.bo_steps
|
bo_response.nr_steps = self.bo_steps
|
||||||
|
|
||||||
bo_response.reward_mean = np.mean(self.reward, axis=1).tolist()
|
bo_response.reward_mean = np.mean(self.reward, axis=0).tolist()
|
||||||
bo_response.reward_std = np.std(self.reward, axis=1).tolist()
|
bo_response.reward_std = np.std(self.reward, axis=0).tolist()
|
||||||
|
|
||||||
if self.save_result:
|
if self.save_result:
|
||||||
if self.bo_env == "Reacher":
|
if self.bo_env == "Reacher":
|
||||||
env = 're'
|
env = 're'
|
||||||
|
elif self.bo_env == "Finger":
|
||||||
|
env = 'fin'
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@ -278,7 +274,7 @@ class ActiveBOTopic(Node):
|
|||||||
active_rl_request = ActiveRLRequest()
|
active_rl_request = ActiveRLRequest()
|
||||||
|
|
||||||
if self.bo_fixed_seed:
|
if self.bo_fixed_seed:
|
||||||
seed = int(self.seed_array[0, best_policy_idx])
|
seed = int(self.seed_array[best_policy_idx, 0])
|
||||||
else:
|
else:
|
||||||
seed = int(np.random.randint(1, 2147483647, 1)[0])
|
seed = int(np.random.randint(1, 2147483647, 1)[0])
|
||||||
|
|
||||||
@ -386,6 +382,9 @@ class ActiveBOTopic(Node):
|
|||||||
self.best_pol_reward[self.current_run, :], \
|
self.best_pol_reward[self.current_run, :], \
|
||||||
self.best_weights[self.current_run, :, :], idx = self.BO.get_best_result()
|
self.best_weights[self.current_run, :, :], idx = self.BO.get_best_result()
|
||||||
|
|
||||||
|
self.BO.policy_model.weights = self.best_weights[self.current_run, :, :]
|
||||||
|
self.best_policy[self.current_run, :, :] = self.BO.policy_model.rollout()
|
||||||
|
|
||||||
if self.current_run < self.bo_runs - 1:
|
if self.current_run < self.bo_runs - 1:
|
||||||
self.BO = None
|
self.BO = None
|
||||||
|
|
||||||
@ -393,7 +392,6 @@ class ActiveBOTopic(Node):
|
|||||||
self.last_query = 0
|
self.last_query = 0
|
||||||
if self.bo_fixed_seed:
|
if self.bo_fixed_seed:
|
||||||
self.seed_array[self.current_run, 0] = self.seed
|
self.seed_array[self.current_run, 0] = self.seed
|
||||||
else:
|
|
||||||
self.seed = int(np.random.randint(1, 2147483647, 1)[0])
|
self.seed = int(np.random.randint(1, 2147483647, 1)[0])
|
||||||
# self.get_logger().info(f'{self.seed}')
|
# self.get_logger().info(f'{self.seed}')
|
||||||
self.current_run += 1
|
self.current_run += 1
|
||||||
|
@ -124,7 +124,6 @@ class ActiveRL(Node):
|
|||||||
|
|
||||||
def step(self, policy, display_run):
|
def step(self, policy, display_run):
|
||||||
done = False
|
done = False
|
||||||
|
|
||||||
action = policy[self.rl_step, :]
|
action = policy[self.rl_step, :]
|
||||||
action_clipped = action.clip(min=-1.0, max=1.0)
|
action_clipped = action.clip(min=-1.0, max=1.0)
|
||||||
output = self.env.step(action_clipped.astype(np.float64))
|
output = self.env.step(action_clipped.astype(np.float64))
|
||||||
@ -135,7 +134,7 @@ class ActiveRL(Node):
|
|||||||
self.rl_reward += output.reward * 10
|
self.rl_reward += output.reward * 10
|
||||||
done = True
|
done = True
|
||||||
else:
|
else:
|
||||||
self.rl_reward -= 1.0
|
self.rl_reward -= 1.0 + np.linalg.norm(action_clipped) * 0.1
|
||||||
|
|
||||||
if display_run:
|
if display_run:
|
||||||
rgb_array = self.env.physics.render(camera_id=0, height=320, width=480)
|
rgb_array = self.env.physics.render(camera_id=0, height=320, width=480)
|
||||||
@ -173,10 +172,10 @@ class ActiveRL(Node):
|
|||||||
step_count += 1
|
step_count += 1
|
||||||
|
|
||||||
if output.reward != 0.0:
|
if output.reward != 0.0:
|
||||||
self.rl_reward += output.reward * 10
|
env_reward += output.reward * 10
|
||||||
done = True
|
done = True
|
||||||
else:
|
else:
|
||||||
self.rl_reward -= 1.0
|
env_reward -= 1.0 + np.linalg.norm(action_clipped) * 0.1
|
||||||
|
|
||||||
if done:
|
if done:
|
||||||
break
|
break
|
||||||
@ -184,7 +183,7 @@ class ActiveRL(Node):
|
|||||||
|
|
||||||
def mainloop_callback(self):
|
def mainloop_callback(self):
|
||||||
if self.rl_pending:
|
if self.rl_pending:
|
||||||
if self.interactive_run == 0:
|
if self.interactive_run == 0: # Interactive Mode
|
||||||
if not self.best_pol_shown:
|
if not self.best_pol_shown:
|
||||||
if not self.policy_sent:
|
if not self.policy_sent:
|
||||||
self.rl_step = 0
|
self.rl_step = 0
|
||||||
@ -195,11 +194,18 @@ class ActiveRL(Node):
|
|||||||
self.env = suite.load('reacher',
|
self.env = suite.load('reacher',
|
||||||
'hard',
|
'hard',
|
||||||
task_kwargs={'random': random_state})
|
task_kwargs={'random': random_state})
|
||||||
self.rl_spec = self.env.action_spec()
|
elif self.rl_env == "Finger":
|
||||||
self.env.reset()
|
np.random.seed(self.rl_seed)
|
||||||
|
random_state = np.random.RandomState(seed=self.rl_seed)
|
||||||
|
self.env = suite.load('finger',
|
||||||
|
'turn_easy',
|
||||||
|
task_kwargs={'random': random_state})
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
self.rl_spec = self.env.action_spec()
|
||||||
|
self.env.reset()
|
||||||
|
|
||||||
eval_request = ActiveRLEvalRequest()
|
eval_request = ActiveRLEvalRequest()
|
||||||
eval_request.policy = self.rl_policy.flatten('F').tolist()
|
eval_request.policy = self.rl_policy.flatten('F').tolist()
|
||||||
eval_request.weights = self.rl_weights
|
eval_request.weights = self.rl_weights
|
||||||
@ -224,11 +230,18 @@ class ActiveRL(Node):
|
|||||||
self.env = suite.load('reacher',
|
self.env = suite.load('reacher',
|
||||||
'hard',
|
'hard',
|
||||||
task_kwargs={'random': random_state})
|
task_kwargs={'random': random_state})
|
||||||
self.rl_spec = self.env.action_spec()
|
elif self.rl_env == "Finger":
|
||||||
self.env.reset()
|
np.random.seed(self.rl_seed)
|
||||||
|
random_state = np.random.RandomState(seed=self.rl_seed)
|
||||||
|
self.env = suite.load('finger',
|
||||||
|
'turn_easy',
|
||||||
|
task_kwargs={'random': random_state})
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
self.rl_spec = self.env.action_spec()
|
||||||
|
self.env.reset()
|
||||||
|
|
||||||
elif self.best_pol_shown:
|
elif self.best_pol_shown:
|
||||||
if not self.eval_response_received:
|
if not self.eval_response_received:
|
||||||
pass
|
pass
|
||||||
@ -256,7 +269,7 @@ class ActiveRL(Node):
|
|||||||
self.eval_response_received = False
|
self.eval_response_received = False
|
||||||
self.rl_pending = False
|
self.rl_pending = False
|
||||||
|
|
||||||
elif self.interactive_run == 1:
|
elif self.interactive_run == 1: # Last Run
|
||||||
if not self.policy_sent:
|
if not self.policy_sent:
|
||||||
self.rl_step = 0
|
self.rl_step = 0
|
||||||
self.rl_reward = 0.0
|
self.rl_reward = 0.0
|
||||||
@ -265,18 +278,26 @@ class ActiveRL(Node):
|
|||||||
np.random.seed(self.rl_seed)
|
np.random.seed(self.rl_seed)
|
||||||
random_state = np.random.RandomState(seed=self.rl_seed)
|
random_state = np.random.RandomState(seed=self.rl_seed)
|
||||||
self.env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})
|
self.env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})
|
||||||
self.rl_spec = self.env.action_spec()
|
elif self.rl_env == "Finger":
|
||||||
self.env.reset()
|
np.random.seed(self.rl_seed)
|
||||||
|
random_state = np.random.RandomState(seed=self.rl_seed)
|
||||||
|
self.env = suite.load('finger',
|
||||||
|
'turn_easy',
|
||||||
|
task_kwargs={'random': random_state})
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
self.rl_spec = self.env.action_spec()
|
||||||
|
self.env.reset()
|
||||||
|
|
||||||
eval_request = ActiveRLEvalRequest()
|
eval_request = ActiveRLEvalRequest()
|
||||||
eval_request.policy = self.rl_policy.flatten('F').tolist()
|
eval_request.policy = self.rl_policy.flatten('F').tolist()
|
||||||
eval_request.weights = self.rl_weights
|
eval_request.weights = self.rl_weights
|
||||||
|
eval_request.nr_steps = self.nr_steps
|
||||||
|
eval_request.nr_weights = self.nr_weights
|
||||||
|
|
||||||
self.eval_pub.publish(eval_request)
|
self.eval_pub.publish(eval_request)
|
||||||
self.get_logger().info('Active RL: Called!')
|
self.get_logger().info('Active RL: Display Best Run!')
|
||||||
self.get_logger().info('Active RL: Waiting for Eval!')
|
|
||||||
|
|
||||||
self.policy_sent = True
|
self.policy_sent = True
|
||||||
|
|
||||||
@ -287,16 +308,23 @@ class ActiveRL(Node):
|
|||||||
self.rl_reward = 0.0
|
self.rl_reward = 0.0
|
||||||
self.rl_pending = False
|
self.rl_pending = False
|
||||||
|
|
||||||
elif self.interactive_run == 2:
|
elif self.interactive_run == 2: # Without display
|
||||||
if self.rl_env == "Reacher":
|
if self.rl_env == "Reacher":
|
||||||
np.random.seed(self.rl_seed)
|
np.random.seed(self.rl_seed)
|
||||||
random_state = np.random.RandomState(seed=self.rl_seed)
|
random_state = np.random.RandomState(seed=self.rl_seed)
|
||||||
self.env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})
|
self.env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})
|
||||||
self.rl_spec = self.env.action_spec()
|
elif self.rl_env == "Finger":
|
||||||
self.env.reset()
|
np.random.seed(self.rl_seed)
|
||||||
|
random_state = np.random.RandomState(seed=self.rl_seed)
|
||||||
|
self.env = suite.load('finger',
|
||||||
|
'turn_easy',
|
||||||
|
task_kwargs={'random': random_state})
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
self.rl_spec = self.env.action_spec()
|
||||||
|
self.env.reset()
|
||||||
|
|
||||||
env_reward, step_count = self.runner(self.rl_policy)
|
env_reward, step_count = self.runner(self.rl_policy)
|
||||||
|
|
||||||
rl_response = ActiveRLResponse()
|
rl_response = ActiveRLResponse()
|
||||||
@ -315,6 +343,66 @@ class ActiveRL(Node):
|
|||||||
self.reset_rl_request()
|
self.reset_rl_request()
|
||||||
self.rl_pending = False
|
self.rl_pending = False
|
||||||
|
|
||||||
|
elif self.interactive_run == 3: # Init Run to Display the first runs,checking pose of the arm and target
|
||||||
|
if not self.policy_sent:
|
||||||
|
self.rl_step = 0
|
||||||
|
self.rl_reward = 0.0
|
||||||
|
|
||||||
|
if self.rl_env == "Reacher":
|
||||||
|
np.random.seed(self.rl_seed)
|
||||||
|
random_state = np.random.RandomState(seed=self.rl_seed)
|
||||||
|
self.env = suite.load('reacher', 'hard', task_kwargs={'random': random_state})
|
||||||
|
elif self.rl_env == "Finger":
|
||||||
|
np.random.seed(self.rl_seed)
|
||||||
|
random_state = np.random.RandomState(seed=self.rl_seed)
|
||||||
|
self.env = suite.load('finger',
|
||||||
|
'turn_easy',
|
||||||
|
task_kwargs={'random': random_state})
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
self.rl_spec = self.env.action_spec()
|
||||||
|
self.env.reset()
|
||||||
|
|
||||||
|
eval_request = ActiveRLEvalRequest()
|
||||||
|
eval_request.policy = self.rl_policy.flatten('F').tolist()
|
||||||
|
eval_request.weights = self.rl_weights
|
||||||
|
eval_request.nr_steps = self.nr_steps
|
||||||
|
eval_request.nr_weights = self.nr_weights
|
||||||
|
|
||||||
|
self.eval_pub.publish(eval_request)
|
||||||
|
self.get_logger().info('Active RL: Init Display!')
|
||||||
|
|
||||||
|
self.policy_sent = True
|
||||||
|
|
||||||
|
done = self.step(self.rl_policy, self.display_run)
|
||||||
|
|
||||||
|
if done:
|
||||||
|
rl_response = ActiveRLResponse()
|
||||||
|
rl_response.weights = self.rl_weights
|
||||||
|
rl_response.reward = self.rl_reward
|
||||||
|
rl_response.final_step = self.rl_step
|
||||||
|
if self.weight_preference is None:
|
||||||
|
weight_preference = [False] * len(self.rl_weights)
|
||||||
|
else:
|
||||||
|
weight_preference = self.weight_preference
|
||||||
|
|
||||||
|
rl_response.weight_preference = weight_preference
|
||||||
|
|
||||||
|
self.active_rl_pub.publish(rl_response)
|
||||||
|
|
||||||
|
# reset flags and attributes
|
||||||
|
self.reset_eval_request()
|
||||||
|
self.reset_rl_request()
|
||||||
|
|
||||||
|
self.rl_step = 0
|
||||||
|
self.rl_reward = 0.0
|
||||||
|
|
||||||
|
self.best_pol_shown = False
|
||||||
|
self.eval_response_received = False
|
||||||
|
self.rl_pending = False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main(args=None):
|
def main(args=None):
|
||||||
rclpy.init(args=args)
|
rclpy.init(args=args)
|
||||||
|
Loading…
Reference in New Issue
Block a user