Improved simulation time

This commit is contained in:
Niko Feith 2023-06-12 13:57:36 +02:00
parent e0451ab4e3
commit b902e07424
2 changed files with 108 additions and 36 deletions

View File

@ -78,6 +78,10 @@ class ActiveRLService(Node):
self.mainloop_callback, self.mainloop_callback,
callback_group=mainloop_callback_group) callback_group=mainloop_callback_group)
# time messurements
self.begin_time = None
self.end_time = None
def reset_rl_request(self): def reset_rl_request(self):
self.rl_env = None self.rl_env = None
self.rl_seed = None self.rl_seed = None
@ -86,6 +90,7 @@ class ActiveRLService(Node):
self.interactive_run = 0 self.interactive_run = 0
def active_rl_callback(self, msg): def active_rl_callback(self, msg):
self.begin_time = time.time()
self.rl_env = msg.env self.rl_env = msg.env
self.rl_seed = msg.seed self.rl_seed = msg.seed
self.display_run = msg.display_run self.display_run = msg.display_run
@ -155,6 +160,27 @@ class ActiveRLService(Node):
return done return done
def complete_run(self, policy):
env_reward = 0.0
step_count = 0
self.env.reset(seed=self.rl_seed)
for i in range(len(policy)):
action = policy[i]
action_clipped = action.clip(min=-1.0, max=1.0)
output = self.env.step(action_clipped.astype(np.float64))
env_reward += output[1]
done = output[2]
step_count += 1
if done:
break
self.env.reset(seed=self.rl_seed)
return env_reward, step_count
def mainloop_callback(self): def mainloop_callback(self):
if self.rl_pending: if self.rl_pending:
if self.interactive_run == 0: if self.interactive_run == 0:
@ -195,6 +221,10 @@ class ActiveRLService(Node):
rl_response.final_step = self.rl_step rl_response.final_step = self.rl_step
self.active_rl_pub.publish(rl_response) self.active_rl_pub.publish(rl_response)
self.end_time = time.time()
self.get_logger().info(f'RL Time: {self.end_time-self.begin_time}, mode: {self.interactive_run}')
self.begin_time = None
self.end_time = None
self.env.reset(seed=self.rl_seed) self.env.reset(seed=self.rl_seed)
@ -232,32 +262,57 @@ class ActiveRLService(Node):
self.rl_reward = 0.0 self.rl_reward = 0.0
self.rl_pending = False self.rl_pending = False
elif self.interactive_run == 2: self.end_time = time.time()
if not self.policy_sent: self.get_logger().info(f'RL Time: {self.end_time - self.begin_time}, mode: {self.interactive_run}')
self.rl_step = 0 self.begin_time = None
self.rl_reward = 0.0 self.end_time = None
self.env.reset(seed=self.rl_seed)
self.policy_sent = True elif self.interactive_run == 2:
done = self.next_image(self.rl_policy, self.display_run) env_reward, step_count = self.complete_run(self.rl_policy)
if done:
rl_response = ActiveRLResponse() rl_response = ActiveRLResponse()
rl_response.weights = self.rl_weights rl_response.weights = self.rl_weights
rl_response.reward = self.rl_reward rl_response.reward = env_reward
rl_response.final_step = self.rl_step rl_response.final_step = step_count
self.active_rl_pub.publish(rl_response) self.active_rl_pub.publish(rl_response)
# reset flags and attributes self.end_time = time.time()
self.reset_eval_request() self.get_logger().info(f'RL Time: {self.end_time - self.begin_time}, mode: {self.interactive_run}')
self.begin_time = None
self.end_time = None
self.reset_rl_request() self.reset_rl_request()
self.rl_step = 0
self.rl_reward = 0.0
self.rl_pending = False self.rl_pending = False
# if not self.policy_sent:
# self.rl_step = 0
# self.rl_reward = 0.0
# self.env.reset(seed=self.rl_seed)
# self.policy_sent = True
# done = self.next_image(self.rl_policy, self.display_run)
#
# if done:
# rl_response = ActiveRLResponse()
# rl_response.weights = self.rl_weights
# rl_response.reward = self.rl_reward
# rl_response.final_step = self.rl_step
#
# self.active_rl_pub.publish(rl_response)
# self.end_time = time.time()
# self.get_logger().info(f'RL Time: {self.end_time - self.begin_time}, mode: {self.interactive_run}')
# self.begin_time = None
# self.end_time = None
#
# # reset flags and attributes
# self.reset_eval_request()
# self.reset_rl_request()
#
# self.rl_step = 0
# self.rl_reward = 0.0
#
# self.rl_pending = False
def main(args=None): def main(args=None):

View File

@ -98,6 +98,15 @@ class ActiveBOTopic(Node):
self.mainloop_callback, self.mainloop_callback,
callback_group=mainloop_callback_group) callback_group=mainloop_callback_group)
# time messurements
self.init_begin = None
self.init_end = None
self.rl_begin = None
self.rl_end = None
self.user_query_begin = None
self.user_query_end = None
def reset_bo_request(self): def reset_bo_request(self):
self.bo_env = None self.bo_env = None
self.bo_metric = None self.bo_metric = None
@ -144,6 +153,18 @@ class ActiveBOTopic(Node):
else: else:
self.seed = None self.seed = None
# set rl environment
if self.bo_env == "Mountain Car":
self.env = Continuous_MountainCarEnv()
elif self.bo_env == "Cartpole":
self.env = CartPoleEnv()
elif self.bo_env == "Acrobot":
self.env = AcrobotEnv()
elif self.bo_env == "Pendulum":
self.env = PendulumEnv()
else:
raise NotImplementedError
def reset_rl_response(self): def reset_rl_response(self):
self.rl_weights = None self.rl_weights = None
self.rl_final_step = None self.rl_final_step = None
@ -163,6 +184,11 @@ class ActiveBOTopic(Node):
if self.init_pending: if self.init_pending:
self.init_step += 1 self.init_step += 1
self.init_end = time.time()
self.get_logger().info(f'Init Time: {self.init_end-self.init_begin}')
self.init_begin = None
self.init_end = None
if self.init_step == self.nr_init: if self.init_step == self.nr_init:
self.init_step = 0 self.init_step = 0
self.init_pending = False self.init_pending = False
@ -175,20 +201,12 @@ class ActiveBOTopic(Node):
self.reset_rl_response() self.reset_rl_response()
def mainloop_callback(self): def mainloop_callback(self):
if self.active_bo_pending: if not self.active_bo_pending:
return
# set rl environment
if self.env is None:
if self.bo_env == "Mountain Car":
self.env = Continuous_MountainCarEnv()
elif self.bo_env == "Cartpole":
self.env = CartPoleEnv()
elif self.bo_env == "Acrobot":
self.env = AcrobotEnv()
elif self.bo_env == "Pendulum":
self.env = PendulumEnv()
else: else:
raise NotImplementedError if self.rl_pending:
return
if self.BO is None: if self.BO is None:
self.BO = BayesianOptimization(self.env, self.BO = BayesianOptimization(self.env,
@ -204,8 +222,8 @@ class ActiveBOTopic(Node):
self.get_logger().info('BO Initialization is starting!') self.get_logger().info('BO Initialization is starting!')
# self.get_logger().info(f'{self.rl_pending}') # self.get_logger().info(f'{self.rl_pending}')
if self.init_pending and not self.rl_pending: if self.init_pending:
self.init_begin = time.time()
if self.bo_fixed_seed: if self.bo_fixed_seed:
seed = self.seed seed = self.seed
else: else:
@ -222,7 +240,6 @@ class ActiveBOTopic(Node):
self.active_rl_pub.publish(rl_msg) self.active_rl_pub.publish(rl_msg)
self.rl_pending = True self.rl_pending = True
return
if self.current_run == self.bo_runs: if self.current_run == self.bo_runs:
bo_response = ActiveBOResponse() bo_response = ActiveBOResponse()
@ -293,7 +310,7 @@ class ActiveBOTopic(Node):
self.reset_bo_request() self.reset_bo_request()
else: else:
if self.rl_pending or self.init_pending: if self.init_pending:
return return
else: else:
if self.current_episode < self.bo_episodes: if self.current_episode < self.bo_episodes: