diff --git a/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py b/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py index cf6e755..4faeb23 100644 --- a/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py +++ b/src/active_bo_ros/active_bo_ros/ReinforcementLearning/ContinuousMountainCar.py @@ -173,9 +173,7 @@ class Continuous_MountainCarEnv(gym.Env): if self.render_mode == "human": self.render() - elif self.render_mode == "rgb_array": - rgb_array = self.render() - return self.state, reward, terminated, False, {}, rgb_array + return self.state, reward, terminated, False, {} def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): @@ -187,9 +185,6 @@ class Continuous_MountainCarEnv(gym.Env): if self.render_mode == "human": self.render() - elif self.render_mode == "rgb_array": - rgb_array = self.render() - return np.array(self.state, dtype=np.float32), {}, rgb_array return np.array(self.state, dtype=np.float32), {} def _height(self, xs): diff --git a/src/active_bo_ros/active_bo_ros/rl_service.py b/src/active_bo_ros/active_bo_ros/rl_service.py index bf6d7b8..6934262 100644 --- a/src/active_bo_ros/active_bo_ros/rl_service.py +++ b/src/active_bo_ros/active_bo_ros/rl_service.py @@ -37,13 +37,19 @@ class RLService(Node): done = output[2] step_count += 1 - rgb_array = output[5] + rgb_array = self.env.render() rgb_shape = rgb_array.shape red = rgb_array[:, :, 0].flatten().tolist() green = rgb_array[:, :, 1].flatten().tolist() blue = rgb_array[:, :, 2].flatten().tolist() + + # random int data + # red = np.random.randint(0, 255, 240000).tolist() + # green = np.random.randint(0, 255, 240000).tolist() + # blue = np.random.randint(0, 255, 240000).tolist() + feedback_msg.height = rgb_shape[0] feedback_msg.width = rgb_shape[1] feedback_msg.red = red @@ -58,7 +64,7 @@ class RLService(Node): distance = -(self.env.goal_position - output[0][0]) reward += distance * self.distance_penalty - time.sleep(0.01) + # time.sleep(0.01) response.reward = reward response.final_step = step_count