debugging regular
This commit is contained in:
parent
3a8acb6807
commit
355165e804
@ -71,8 +71,8 @@ class BayesianOptimization:
|
||||
self.env.reset(seed=seed)
|
||||
self.reset_bo()
|
||||
|
||||
self.X = np.zeros((self.nr_init, self.nr_policy_weights))
|
||||
self.Y = np.zeros((self.nr_init, 1))
|
||||
self.X = np.zeros((self.nr_init, self.nr_policy_weights), dtype=np.float32)
|
||||
self.Y = np.zeros((self.nr_init, 1), dtype=np.float32)
|
||||
|
||||
for i in range(self.nr_init):
|
||||
self.policy_model.random_policy()
|
||||
@ -127,29 +127,29 @@ class BayesianOptimization:
|
||||
|
||||
reward, step_count = self.runner(policy, seed=seed)
|
||||
|
||||
self.X = np.vstack((self.X, x_next))
|
||||
self.Y = np.vstack((self.Y, reward))
|
||||
self.X = np.vstack((self.X, x_next), dtype=np.float32)
|
||||
self.Y = np.vstack((self.Y, reward), dtype=np.float32)
|
||||
|
||||
self.GP.fit(self.X, self.Y)
|
||||
|
||||
if self.episode == 0:
|
||||
self.best_reward[0] = max(self.Y)
|
||||
self.best_reward[0] = np.max(self.Y)
|
||||
else:
|
||||
self.best_reward = np.vstack((self.best_reward, max(self.Y)))
|
||||
self.best_reward = np.vstack((self.best_reward, np.max(self.Y)), dtype=np.float32)
|
||||
|
||||
self.episode += 1
|
||||
return step_count
|
||||
|
||||
def add_new_observation(self, reward, x_new):
|
||||
self.X = np.vstack((self.X, x_new))
|
||||
self.Y = np.vstack((self.Y, reward))
|
||||
self.X = np.vstack((self.X, x_new), dtype=np.float32)
|
||||
self.Y = np.vstack((self.Y, reward), dtype=np.float32)
|
||||
|
||||
self.GP.fit(self.X, self.Y)
|
||||
|
||||
if self.episode == 0:
|
||||
self.best_reward[0] = max(self.Y)
|
||||
self.best_reward[0] = np.max(self.Y)
|
||||
else:
|
||||
self.best_reward = np.vstack((self.best_reward, max(self.Y)))
|
||||
self.best_reward = np.vstack((self.best_reward, np.max(self.Y)), dtype=np.float32)
|
||||
|
||||
self.episode += 1
|
||||
|
||||
@ -161,4 +161,4 @@ class BayesianOptimization:
|
||||
self.policy_model.weights = x_max
|
||||
best_policy = self.policy_model.rollout().reshape(-1,)
|
||||
|
||||
return best_policy, y_max, x_max
|
||||
return best_policy, y_max, x_max, idx
|
||||
|
@ -57,6 +57,7 @@ class ActiveBOTopic(Node):
|
||||
self.current_run = 0
|
||||
self.current_episode = 0
|
||||
self.seed = None
|
||||
self.seed_array = None
|
||||
self.save_result = False
|
||||
|
||||
# Active Reinforcement Learning Publisher, Subscriber and Message attributes
|
||||
@ -106,6 +107,7 @@ class ActiveBOTopic(Node):
|
||||
self.current_run = 0
|
||||
self.current_episode = 0
|
||||
self.save_result = False
|
||||
self.seed_array = None
|
||||
|
||||
def active_bo_callback(self, msg):
|
||||
if not self.active_bo_pending:
|
||||
@ -121,6 +123,7 @@ class ActiveBOTopic(Node):
|
||||
self.bo_acq_fcn = msg.acquisition_function
|
||||
self.bo_metric_parameter = msg.metric_parameter
|
||||
self.save_result = msg.save_result
|
||||
self.seed_array = np.zeros((1, self.bo_runs))
|
||||
|
||||
# initialize
|
||||
self.reward = np.zeros((self.bo_episodes, self.bo_runs))
|
||||
@ -131,6 +134,7 @@ class ActiveBOTopic(Node):
|
||||
# set the seed
|
||||
if self.bo_fixed_seed:
|
||||
self.seed = int(np.random.randint(1, 2147483647, 1)[0])
|
||||
self.get_logger().info(str(self.seed))
|
||||
else:
|
||||
self.seed = None
|
||||
|
||||
@ -185,6 +189,10 @@ class ActiveBOTopic(Node):
|
||||
bo_response.best_policy = self.best_policy[:, best_policy_idx].tolist()
|
||||
bo_response.best_weights = self.best_weights[:, best_policy_idx].tolist()
|
||||
|
||||
self.get_logger().info(f'Best Policy: {self.best_pol_reward}')
|
||||
|
||||
self.get_logger().info(f'{best_policy_idx}, {int(self.seed_array[0, best_policy_idx])}')
|
||||
|
||||
bo_response.reward_mean = np.mean(self.reward, axis=1).tolist()
|
||||
bo_response.reward_std = np.std(self.reward, axis=1).tolist()
|
||||
|
||||
@ -217,14 +225,17 @@ class ActiveBOTopic(Node):
|
||||
filename = filename.replace('.', '_') + '.csv'
|
||||
path = os.path.join(file_path, filename)
|
||||
|
||||
np.savetxt(path, self.reward, delimiter=',')
|
||||
data = self.reward
|
||||
|
||||
np.savetxt(path, data, delimiter=',')
|
||||
|
||||
active_rl_request = ActiveRL()
|
||||
|
||||
if self.seed is None:
|
||||
seed = int(np.random.randint(1, 2147483647, 1)[0])
|
||||
if self.bo_fixed_seed:
|
||||
seed = int(self.seed_array[0, best_policy_idx])
|
||||
self.get_logger().info(f'Used seed{seed}')
|
||||
else:
|
||||
seed = self.seed
|
||||
seed = int(np.random.randint(1, 2147483647, 1)[0])
|
||||
|
||||
active_rl_request.env = self.bo_env
|
||||
active_rl_request.seed = seed
|
||||
@ -232,7 +243,6 @@ class ActiveBOTopic(Node):
|
||||
active_rl_request.weights = self.best_weights[:, best_policy_idx].tolist()
|
||||
active_rl_request.final_run = True
|
||||
|
||||
self.get_logger().info('Calling: Active RL')
|
||||
self.active_rl_pub.publish(active_rl_request)
|
||||
|
||||
self.get_logger().info('Responding: Active BO')
|
||||
@ -269,12 +279,15 @@ class ActiveBOTopic(Node):
|
||||
|
||||
if user_query.query():
|
||||
active_rl_request = ActiveRL()
|
||||
old_policy, _, old_weights = self.BO.get_best_result()
|
||||
old_policy, y_max, old_weights, _ = self.BO.get_best_result()
|
||||
|
||||
if self.seed is None:
|
||||
seed = int(np.random.randint(1, 2147483647, 1)[0])
|
||||
else:
|
||||
self.get_logger().info(f'Best: {y_max}, w:{old_weights}')
|
||||
self.get_logger().info(f'Size of Y: {self.BO.Y.shape}, Size of X: {self.BO.X.shape}')
|
||||
|
||||
if self.bo_fixed_seed:
|
||||
seed = self.seed
|
||||
else:
|
||||
seed = int(np.random.randint(1, 2147483647, 1)[0])
|
||||
|
||||
active_rl_request.env = self.bo_env
|
||||
active_rl_request.seed = seed
|
||||
@ -291,17 +304,23 @@ class ActiveBOTopic(Node):
|
||||
self.BO.eval_new_observation(x_next)
|
||||
|
||||
self.current_episode += 1
|
||||
self.get_logger().info(f'Current Episode: {self.current_episode}')
|
||||
# self.get_logger().info(f'Current Episode: {self.current_episode}')
|
||||
else:
|
||||
self.best_policy[:, self.current_run], \
|
||||
self.best_pol_reward[:, self.current_run], \
|
||||
self.best_weights[:, self.current_run] = self.BO.get_best_result()
|
||||
self.best_weights[:, self.current_run], idx = self.BO.get_best_result()
|
||||
|
||||
self.get_logger().info(f'best idx: {idx}')
|
||||
|
||||
self.reward[:, self.current_run] = self.BO.best_reward.T
|
||||
|
||||
self.BO = None
|
||||
|
||||
self.current_episode = 0
|
||||
if self.bo_fixed_seed:
|
||||
self.seed_array[0, self.current_run] = self.seed
|
||||
self.seed = int(np.random.randint(1, 2147483647, 1)[0])
|
||||
self.get_logger().info(f'{self.seed}')
|
||||
self.current_run += 1
|
||||
self.get_logger().info(f'Current Run: {self.current_run}')
|
||||
|
||||
@ -314,8 +333,8 @@ class ActiveBOTopic(Node):
|
||||
state_msg.current_run = self.current_run + 1 if self.current_run < self.bo_runs else self.bo_runs
|
||||
state_msg.current_episode = self.current_episode + 1 \
|
||||
if self.current_episode < self.bo_episodes else self.bo_episodes
|
||||
state_msg.best_reward = self.best_reward
|
||||
state_msg.last_user_reward = self.rl_reward
|
||||
state_msg.best_reward = float(self.best_reward)
|
||||
state_msg.last_user_reward = float(self.rl_reward)
|
||||
self.state_pub.publish(state_msg)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user