debugging regular

This commit is contained in:
Niko Feith 2023-06-06 18:43:49 +02:00
parent 3a8acb6807
commit 355165e804
2 changed files with 43 additions and 24 deletions

View File

@ -71,8 +71,8 @@ class BayesianOptimization:
self.env.reset(seed=seed) self.env.reset(seed=seed)
self.reset_bo() self.reset_bo()
self.X = np.zeros((self.nr_init, self.nr_policy_weights)) self.X = np.zeros((self.nr_init, self.nr_policy_weights), dtype=np.float32)
self.Y = np.zeros((self.nr_init, 1)) self.Y = np.zeros((self.nr_init, 1), dtype=np.float32)
for i in range(self.nr_init): for i in range(self.nr_init):
self.policy_model.random_policy() self.policy_model.random_policy()
@ -127,29 +127,29 @@ class BayesianOptimization:
reward, step_count = self.runner(policy, seed=seed) reward, step_count = self.runner(policy, seed=seed)
self.X = np.vstack((self.X, x_next)) self.X = np.vstack((self.X, x_next), dtype=np.float32)
self.Y = np.vstack((self.Y, reward)) self.Y = np.vstack((self.Y, reward), dtype=np.float32)
self.GP.fit(self.X, self.Y) self.GP.fit(self.X, self.Y)
if self.episode == 0: if self.episode == 0:
self.best_reward[0] = max(self.Y) self.best_reward[0] = np.max(self.Y)
else: else:
self.best_reward = np.vstack((self.best_reward, max(self.Y))) self.best_reward = np.vstack((self.best_reward, np.max(self.Y)), dtype=np.float32)
self.episode += 1 self.episode += 1
return step_count return step_count
def add_new_observation(self, reward, x_new): def add_new_observation(self, reward, x_new):
self.X = np.vstack((self.X, x_new)) self.X = np.vstack((self.X, x_new), dtype=np.float32)
self.Y = np.vstack((self.Y, reward)) self.Y = np.vstack((self.Y, reward), dtype=np.float32)
self.GP.fit(self.X, self.Y) self.GP.fit(self.X, self.Y)
if self.episode == 0: if self.episode == 0:
self.best_reward[0] = max(self.Y) self.best_reward[0] = np.max(self.Y)
else: else:
self.best_reward = np.vstack((self.best_reward, max(self.Y))) self.best_reward = np.vstack((self.best_reward, np.max(self.Y)), dtype=np.float32)
self.episode += 1 self.episode += 1
@ -161,4 +161,4 @@ class BayesianOptimization:
self.policy_model.weights = x_max self.policy_model.weights = x_max
best_policy = self.policy_model.rollout().reshape(-1,) best_policy = self.policy_model.rollout().reshape(-1,)
return best_policy, y_max, x_max return best_policy, y_max, x_max, idx

View File

@ -57,6 +57,7 @@ class ActiveBOTopic(Node):
self.current_run = 0 self.current_run = 0
self.current_episode = 0 self.current_episode = 0
self.seed = None self.seed = None
self.seed_array = None
self.save_result = False self.save_result = False
# Active Reinforcement Learning Publisher, Subscriber and Message attributes # Active Reinforcement Learning Publisher, Subscriber and Message attributes
@ -106,6 +107,7 @@ class ActiveBOTopic(Node):
self.current_run = 0 self.current_run = 0
self.current_episode = 0 self.current_episode = 0
self.save_result = False self.save_result = False
self.seed_array = None
def active_bo_callback(self, msg): def active_bo_callback(self, msg):
if not self.active_bo_pending: if not self.active_bo_pending:
@ -121,6 +123,7 @@ class ActiveBOTopic(Node):
self.bo_acq_fcn = msg.acquisition_function self.bo_acq_fcn = msg.acquisition_function
self.bo_metric_parameter = msg.metric_parameter self.bo_metric_parameter = msg.metric_parameter
self.save_result = msg.save_result self.save_result = msg.save_result
self.seed_array = np.zeros((1, self.bo_runs))
# initialize # initialize
self.reward = np.zeros((self.bo_episodes, self.bo_runs)) self.reward = np.zeros((self.bo_episodes, self.bo_runs))
@ -131,6 +134,7 @@ class ActiveBOTopic(Node):
# set the seed # set the seed
if self.bo_fixed_seed: if self.bo_fixed_seed:
self.seed = int(np.random.randint(1, 2147483647, 1)[0]) self.seed = int(np.random.randint(1, 2147483647, 1)[0])
self.get_logger().info(str(self.seed))
else: else:
self.seed = None self.seed = None
@ -185,6 +189,10 @@ class ActiveBOTopic(Node):
bo_response.best_policy = self.best_policy[:, best_policy_idx].tolist() bo_response.best_policy = self.best_policy[:, best_policy_idx].tolist()
bo_response.best_weights = self.best_weights[:, best_policy_idx].tolist() bo_response.best_weights = self.best_weights[:, best_policy_idx].tolist()
self.get_logger().info(f'Best Policy: {self.best_pol_reward}')
self.get_logger().info(f'{best_policy_idx}, {int(self.seed_array[0, best_policy_idx])}')
bo_response.reward_mean = np.mean(self.reward, axis=1).tolist() bo_response.reward_mean = np.mean(self.reward, axis=1).tolist()
bo_response.reward_std = np.std(self.reward, axis=1).tolist() bo_response.reward_std = np.std(self.reward, axis=1).tolist()
@ -217,14 +225,17 @@ class ActiveBOTopic(Node):
filename = filename.replace('.', '_') + '.csv' filename = filename.replace('.', '_') + '.csv'
path = os.path.join(file_path, filename) path = os.path.join(file_path, filename)
np.savetxt(path, self.reward, delimiter=',') data = self.reward
np.savetxt(path, data, delimiter=',')
active_rl_request = ActiveRL() active_rl_request = ActiveRL()
if self.seed is None: if self.bo_fixed_seed:
seed = int(np.random.randint(1, 2147483647, 1)[0]) seed = int(self.seed_array[0, best_policy_idx])
self.get_logger().info(f'Used seed{seed}')
else: else:
seed = self.seed seed = int(np.random.randint(1, 2147483647, 1)[0])
active_rl_request.env = self.bo_env active_rl_request.env = self.bo_env
active_rl_request.seed = seed active_rl_request.seed = seed
@ -232,7 +243,6 @@ class ActiveBOTopic(Node):
active_rl_request.weights = self.best_weights[:, best_policy_idx].tolist() active_rl_request.weights = self.best_weights[:, best_policy_idx].tolist()
active_rl_request.final_run = True active_rl_request.final_run = True
self.get_logger().info('Calling: Active RL')
self.active_rl_pub.publish(active_rl_request) self.active_rl_pub.publish(active_rl_request)
self.get_logger().info('Responding: Active BO') self.get_logger().info('Responding: Active BO')
@ -269,12 +279,15 @@ class ActiveBOTopic(Node):
if user_query.query(): if user_query.query():
active_rl_request = ActiveRL() active_rl_request = ActiveRL()
old_policy, _, old_weights = self.BO.get_best_result() old_policy, y_max, old_weights, _ = self.BO.get_best_result()
if self.seed is None: self.get_logger().info(f'Best: {y_max}, w:{old_weights}')
seed = int(np.random.randint(1, 2147483647, 1)[0]) self.get_logger().info(f'Size of Y: {self.BO.Y.shape}, Size of X: {self.BO.X.shape}')
else:
if self.bo_fixed_seed:
seed = self.seed seed = self.seed
else:
seed = int(np.random.randint(1, 2147483647, 1)[0])
active_rl_request.env = self.bo_env active_rl_request.env = self.bo_env
active_rl_request.seed = seed active_rl_request.seed = seed
@ -291,17 +304,23 @@ class ActiveBOTopic(Node):
self.BO.eval_new_observation(x_next) self.BO.eval_new_observation(x_next)
self.current_episode += 1 self.current_episode += 1
self.get_logger().info(f'Current Episode: {self.current_episode}') # self.get_logger().info(f'Current Episode: {self.current_episode}')
else: else:
self.best_policy[:, self.current_run], \ self.best_policy[:, self.current_run], \
self.best_pol_reward[:, self.current_run], \ self.best_pol_reward[:, self.current_run], \
self.best_weights[:, self.current_run] = self.BO.get_best_result() self.best_weights[:, self.current_run], idx = self.BO.get_best_result()
self.get_logger().info(f'best idx: {idx}')
self.reward[:, self.current_run] = self.BO.best_reward.T self.reward[:, self.current_run] = self.BO.best_reward.T
self.BO = None self.BO = None
self.current_episode = 0 self.current_episode = 0
if self.bo_fixed_seed:
self.seed_array[0, self.current_run] = self.seed
self.seed = int(np.random.randint(1, 2147483647, 1)[0])
self.get_logger().info(f'{self.seed}')
self.current_run += 1 self.current_run += 1
self.get_logger().info(f'Current Run: {self.current_run}') self.get_logger().info(f'Current Run: {self.current_run}')
@ -314,8 +333,8 @@ class ActiveBOTopic(Node):
state_msg.current_run = self.current_run + 1 if self.current_run < self.bo_runs else self.bo_runs state_msg.current_run = self.current_run + 1 if self.current_run < self.bo_runs else self.bo_runs
state_msg.current_episode = self.current_episode + 1 \ state_msg.current_episode = self.current_episode + 1 \
if self.current_episode < self.bo_episodes else self.bo_episodes if self.current_episode < self.bo_episodes else self.bo_episodes
state_msg.best_reward = self.best_reward state_msg.best_reward = float(self.best_reward)
state_msg.last_user_reward = self.rl_reward state_msg.last_user_reward = float(self.rl_reward)
self.state_pub.publish(state_msg) self.state_pub.publish(state_msg)