random metric works first results

This commit is contained in:
Niko Feith 2023-06-01 15:45:55 +02:00
parent 55b21d667a
commit 334f64e22d
4 changed files with 100 additions and 39 deletions

View File

@ -1,4 +1,5 @@
string env string env
uint32 seed uint32 seed
bool final_run
float32[] policy float32[] policy
float32[] weights float32[] weights

View File

@ -21,7 +21,6 @@ class BayesianOptimization:
self.episode = 0 self.episode = 0
self.counter_array = np.empty((1, 1)) self.counter_array = np.empty((1, 1))
self.best_reward = np.empty((1, 1)) self.best_reward = np.empty((1, 1))
self.distance_penalty = 0
self.nr_policy_weights = nr_weights self.nr_policy_weights = nr_weights
self.nr_steps = nr_steps self.nr_steps = nr_steps
@ -63,8 +62,6 @@ class BayesianOptimization:
break break
if not done and i == len(policy): if not done and i == len(policy):
distance = -(self.env.goal_position - output[0][0])
env_reward += distance * self.distance_penalty
self.counter_array = np.vstack((self.counter_array, step_count)) self.counter_array = np.vstack((self.counter_array, step_count))
self.env.reset(seed=seed) self.env.reset(seed=seed)
@ -157,11 +154,11 @@ class BayesianOptimization:
self.episode += 1 self.episode += 1
def get_best_result(self): def get_best_result(self):
y_hat = self.GP.predict(self.X) y_max = np.max(self.Y)
idx = np.argmax(y_hat) idx = np.argmax(self.Y)
x_max = self.X[idx, :] x_max = self.X[idx, :]
self.policy_model.weights = x_max self.policy_model.weights = x_max
best_policy = self.policy_model.rollout().reshape(-1,) best_policy = self.policy_model.rollout().reshape(-1,)
return best_policy, y_hat[idx], x_max return best_policy, y_max, x_max

View File

@ -189,17 +189,52 @@ class ActiveBOTopic(Node):
bo_response.reward_std = np.std(self.reward, axis=1).tolist() bo_response.reward_std = np.std(self.reward, axis=1).tolist()
if self.save_result: if self.save_result:
if self.bo_env == "Mountain Car":
env = 'mc'
elif self.bo_env == "Cartpole":
env = 'cp'
elif self.bo_env == "Acrobot":
env = 'ab'
elif self.bo_env == "Pendulum":
env = 'pd'
else:
raise NotImplementedError
if self.bo_acq_fcn == "Expected Improvement":
acq = 'ei'
elif self.bo_acq_fcn == "Probability of Improvement":
acq = 'pi'
elif self.bo_acq_fcn == "Upper Confidence Bound":
acq = 'cb'
else:
raise NotImplementedError
home_dir = os.path.expanduser('~') home_dir = os.path.expanduser('~')
file_path = os.path.join(home_dir, 'Documents/IntRLResults') file_path = os.path.join(home_dir, 'Documents/IntRLResults')
filename = self.bo_metric + '-'\ filename = env + '-' + acq + '-' + self.bo_metric + '-' \
+ str(self.bo_metric_parameter) + '-' \ + str(self.bo_metric_parameter) + '-' \
+ str(self.bo_nr_weights) + '-' + str(time.time()) + str(self.bo_nr_weights) + '-' + str(time.time())
filename = filename.replace('.', '_') + '.csv' filename = filename.replace('.', '_') + '.csv'
path = os.path.join(file_path, filename) path = os.path.join(file_path, filename)
self.get_logger().info(path)
np.savetxt(path, self.reward, delimiter=',') np.savetxt(path, self.reward, delimiter=',')
active_rl_request = ActiveRL()
if self.seed is None:
seed = int(np.random.randint(1, 2147483647, 1)[0])
else:
seed = self.seed
active_rl_request.env = self.bo_env
active_rl_request.seed = seed
active_rl_request.policy = self.best_policy[:, best_policy_idx].tolist()
active_rl_request.weights = self.best_weights[:, best_policy_idx].tolist()
active_rl_request.final_run = True
self.get_logger().info('Calling: Active RL')
self.active_rl_pub.publish(active_rl_request)
self.get_logger().info('Responding: Active BO') self.get_logger().info('Responding: Active BO')
self.active_bo_pub.publish(bo_response) self.active_bo_pub.publish(bo_response)
self.reset_bo_request() self.reset_bo_request()
@ -245,6 +280,7 @@ class ActiveBOTopic(Node):
active_rl_request.seed = seed active_rl_request.seed = seed
active_rl_request.policy = old_policy.tolist() active_rl_request.policy = old_policy.tolist()
active_rl_request.weights = old_weights.tolist() active_rl_request.weights = old_weights.tolist()
active_rl_request.final_run = False
self.get_logger().info('Calling: Active RL') self.get_logger().info('Calling: Active RL')
self.active_rl_pub.publish(active_rl_request) self.active_rl_pub.publish(active_rl_request)
@ -276,12 +312,13 @@ class ActiveBOTopic(Node):
state_msg = ActiveBOState() state_msg = ActiveBOState()
state_msg.current_run = self.current_run + 1 if self.current_run < self.bo_runs else self.bo_runs state_msg.current_run = self.current_run + 1 if self.current_run < self.bo_runs else self.bo_runs
state_msg.current_episode = self.current_episode + 1\ state_msg.current_episode = self.current_episode + 1 \
if self.current_episode < self.bo_episodes else self.bo_episodes if self.current_episode < self.bo_episodes else self.bo_episodes
state_msg.best_reward = self.best_reward state_msg.best_reward = self.best_reward
state_msg.last_user_reward = self.rl_reward state_msg.last_user_reward = self.rl_reward
self.state_pub.publish(state_msg) self.state_pub.publish(state_msg)
def main(args=None): def main(args=None):
rclpy.init(args=args) rclpy.init(args=args)

View File

@ -69,6 +69,7 @@ class ActiveRLService(Node):
self.best_pol_shown = False self.best_pol_shown = False
self.policy_sent = False self.policy_sent = False
self.active_rl_pending = False self.active_rl_pending = False
self.final_run = False
# Main loop timer object # Main loop timer object
self.mainloop_timer_period = 0.05 self.mainloop_timer_period = 0.05
@ -87,6 +88,7 @@ class ActiveRLService(Node):
self.rl_seed = msg.seed self.rl_seed = msg.seed
self.rl_policy = np.array(msg.policy, dtype=np.float32) self.rl_policy = np.array(msg.policy, dtype=np.float32)
self.rl_weights = msg.weights self.rl_weights = msg.weights
self.final_run = msg.final_run
if self.rl_env == "Mountain Car": if self.rl_env == "Mountain Car":
self.env = Continuous_MountainCarEnv(render_mode="rgb_array") self.env = Continuous_MountainCarEnv(render_mode="rgb_array")
@ -151,6 +153,7 @@ class ActiveRLService(Node):
def mainloop_callback(self): def mainloop_callback(self):
if self.active_rl_pending: if self.active_rl_pending:
if not self.final_run:
if not self.best_pol_shown: if not self.best_pol_shown:
if not self.policy_sent: if not self.policy_sent:
self.rl_step = 0 self.rl_step = 0
@ -201,6 +204,29 @@ class ActiveRLService(Node):
self.best_pol_shown = False self.best_pol_shown = False
self.eval_response_received = False self.eval_response_received = False
self.active_rl_pending = False self.active_rl_pending = False
else:
if not self.policy_sent:
self.rl_step = 0
self.rl_reward = 0.0
self.env.reset(seed=self.rl_seed)
eval_request = ActiveRL()
eval_request.policy = self.rl_policy.tolist()
eval_request.weights = self.rl_weights
self.eval_pub.publish(eval_request)
self.get_logger().info('Active RL: Called!')
self.get_logger().info('Active RL: Waiting for Eval!')
self.policy_sent = True
done = self.next_image(self.rl_policy)
if done:
self.rl_step = 0
self.rl_reward = 0.0
self.final_run = False
self.active_rl_pending = False
def main(args=None): def main(args=None):