diff --git a/src/active_bo_msgs/msg/ActiveBORequest.msg b/src/active_bo_msgs/msg/ActiveBORequest.msg index 61c61ef..6cdc248 100644 --- a/src/active_bo_msgs/msg/ActiveBORequest.msg +++ b/src/active_bo_msgs/msg/ActiveBORequest.msg @@ -1,4 +1,5 @@ string env +bool fixed_seed string metric uint16 nr_weights uint16 max_steps diff --git a/src/active_bo_msgs/msg/ActiveRL.msg b/src/active_bo_msgs/msg/ActiveRL.msg index 23b086f..ae4e51c 100644 --- a/src/active_bo_msgs/msg/ActiveRL.msg +++ b/src/active_bo_msgs/msg/ActiveRL.msg @@ -1,3 +1,4 @@ string env +uint32 seed float32[] policy float32[] weights \ No newline at end of file diff --git a/src/active_bo_ros/active_bo_ros/ReinforcementLearning/Acrobot.py b/src/active_bo_ros/active_bo_ros/ReinforcementLearning/Acrobot.py index a3d3b6e..37a0ee7 100644 --- a/src/active_bo_ros/active_bo_ros/ReinforcementLearning/Acrobot.py +++ b/src/active_bo_ros/active_bo_ros/ReinforcementLearning/Acrobot.py @@ -196,7 +196,7 @@ class AcrobotEnv(core.Env): def step(self, a): s = self.state assert s is not None, "Call reset before using AcrobotEnv object." - torque = self.AVAIL_TORQUE[a] + torque = a # Add noise to the force action if self.torque_noise_max > 0: diff --git a/src/active_bo_ros/active_bo_ros/ReinforcementLearning/Pendulum.py b/src/active_bo_ros/active_bo_ros/ReinforcementLearning/Pendulum.py index 79b7665..6ab4992 100644 --- a/src/active_bo_ros/active_bo_ros/ReinforcementLearning/Pendulum.py +++ b/src/active_bo_ros/active_bo_ros/ReinforcementLearning/Pendulum.py @@ -227,8 +227,19 @@ class PendulumEnv(gym.Env): self.surf, rod_end[0], rod_end[1], int(rod_width / 2), (204, 77, 77) ) - fname = path.join(path.dirname(__file__), "../../resource/clockwise.png") + try: + import ament_index_python + except ImportError: + raise DependencyNotInstalled( + "ament_index_python is not installed`" + ) + + package_name = 'active_bo_ros' + + package_path = ament_index_python.get_package_share_directory(package_name) + fname = path.join(package_path, 'assets', 'clockwise.png') img = pygame.image.load(fname) + if self.last_u is not None: scale_img = pygame.transform.smoothscale( img, diff --git a/src/active_bo_ros/active_bo_ros/active_bo_topic.py b/src/active_bo_ros/active_bo_ros/active_bo_topic.py index 5dc1a2f..a5fa7c5 100644 --- a/src/active_bo_ros/active_bo_ros/active_bo_topic.py +++ b/src/active_bo_ros/active_bo_ros/active_bo_topic.py @@ -64,7 +64,6 @@ class ActiveBOTopic(Node): # RL Environments and BO self.env = None - self.distance_penalty = 0 self.BO = None self.nr_init = 3 diff --git a/src/active_bo_ros/active_bo_ros/active_rl_topic.py b/src/active_bo_ros/active_bo_ros/active_rl_topic.py index 582d7a6..dc7a565 100644 --- a/src/active_bo_ros/active_bo_ros/active_rl_topic.py +++ b/src/active_bo_ros/active_bo_ros/active_rl_topic.py @@ -65,7 +65,6 @@ class ActiveRLService(Node): # RL Environments self.env = None - self.distance_penalty = 0 self.best_pol_shown = False # Main loop timer object @@ -114,11 +113,8 @@ class ActiveRLService(Node): def next_image(self, policy): action = policy[self.rl_step] action_clipped = action.clip(min=-1.0, max=1.0) - self.get_logger().info(str(action_clipped) + str(type(action_clipped))) output = self.env.step(action_clipped.astype(np.float32)) - self.get_logger().info(str(output)) - self.rl_reward += output[1] done = output[2] self.rl_step += 1 @@ -141,8 +137,6 @@ class ActiveRLService(Node): self.image_pub.publish(feedback_msg) if not done and self.rl_step == len(policy): - distance = -(self.env.goal_position - output[0][0]) - self.rl_reward += distance * self.distance_penalty done = True return done diff --git a/src/active_bo_ros/active_bo_ros/rl_service.py b/src/active_bo_ros/active_bo_ros/rl_service.py index e259384..2c70046 100644 --- a/src/active_bo_ros/active_bo_ros/rl_service.py +++ b/src/active_bo_ros/active_bo_ros/rl_service.py @@ -20,7 +20,6 @@ class RLService(Node): self.publisher = self.create_publisher(ImageFeedback, 'rl_feedback', 1) self.env = None - self.distance_penalty = 0 def rl_callback(self, request, response): @@ -47,7 +46,6 @@ class RLService(Node): for i in range(len(policy)): action = policy[i] action_clipped = action.clip(min=-1.0, max=1.0) - self.get_logger().info(str(action_clipped) + str(type(action_clipped))) output = self.env.step(action_clipped.astype(np.float32)) reward += output[1] @@ -72,10 +70,6 @@ class RLService(Node): if done: break - if not done and i == len(policy): - distance = -(self.env.goal_position - output[0][0]) - reward += distance * self.distance_penalty - response.reward = reward response.final_step = step_count diff --git a/src/active_bo_ros/resource/clockwise.png b/src/active_bo_ros/assets/clockwise.png similarity index 100% rename from src/active_bo_ros/resource/clockwise.png rename to src/active_bo_ros/assets/clockwise.png diff --git a/src/active_bo_ros/package.xml b/src/active_bo_ros/package.xml index 2cbe236..b79b01d 100644 --- a/src/active_bo_ros/package.xml +++ b/src/active_bo_ros/package.xml @@ -18,5 +18,6 @@ ament_python + diff --git a/src/active_bo_ros/setup.py b/src/active_bo_ros/setup.py index 7e87a25..5e1e7ce 100644 --- a/src/active_bo_ros/setup.py +++ b/src/active_bo_ros/setup.py @@ -17,6 +17,7 @@ setup( ['resource/' + package_name]), ('share/' + package_name, ['package.xml']), (os.path.join('share', package_name), glob('launch/*.launch.py')), + ('share/' + package_name + '/assets', ['assets/clockwise.png']), ], install_requires=['setuptools', 'gym', 'numpy'], zip_safe=True,